From ca11480c3458edbaa2eea3c2ff8a60856f821b85 Mon Sep 17 00:00:00 2001 From: wuweikang Date: Fri, 7 Aug 2020 15:06:16 +0800 Subject: [PATCH] runpackage sync C75B050 --- RELEASE.md | 1 + inc/common/opskernel/ge_task_info.h | 7 + inc/common/util/error_manager/error_manager.h | 25 +- inc/common/util/platform_info.h | 2 + inc/common/util/platform_info_def.h | 14 + inc/external/ge/ge_api_types.h | 14 +- inc/external/graph/operator_reg.h | 1 + inc/framework/common/debug/log.h | 2 +- inc/framework/common/ge_inner_error_codes.h | 23 +- inc/framework/common/types.h | 13 + inc/framework/executor/ge_executor.h | 64 +- inc/framework/memory/memory_api.h | 56 ++ inc/framework/omg/omg.h | 4 - inc/framework/omg/omg_inner_types.h | 1 + inc/framework/omg/parser/model_parser.h | 110 ++++ inc/framework/omg/parser/op_parser.h | 92 +++ inc/framework/omg/parser/parser_api.h | 31 + inc/framework/omg/parser/parser_factory.h | 138 +++++ inc/framework/omg/parser/parser_inner_ctx.h | 43 ++ inc/framework/omg/parser/weights_parser.h | 74 +++ inc/graph/compute_graph.h | 5 +- inc/graph/debug/ge_attr_define.h | 27 + inc/graph/detail/model_serialize_imp.h | 3 + inc/graph/op_desc.h | 6 +- src/common/graph/compute_graph.cc | 73 ++- src/common/graph/ge_attr_define.cc | 29 + src/common/graph/ge_attr_value.cc | 26 +- src/common/graph/graph.mk | 2 + src/common/graph/model_serialize.cc | 119 +++- src/common/graph/node.cc | 34 +- src/common/graph/op_desc.cc | 141 ++--- src/common/graph/ref_relation.cc | 35 ++ src/common/graph/shape_refiner.cc | 76 ++- src/common/graph/utils/ge_ir_utils.h | 10 +- src/common/graph/utils/node_utils.cc | 33 +- src/ge/CMakeLists.txt | 25 +- src/ge/common/CMakeLists.txt | 1 + src/ge/common/formats/utils/formats_trans_utils.h | 1 - src/ge/common/ge/tbe_plugin_manager.cc | 7 +- src/ge/common/ge/tbe_plugin_manager.h | 2 +- src/ge/common/ge_common.mk | 1 + src/ge/common/helper/model_helper.cc | 51 +- src/ge/common/helper/om_file_helper.cc | 31 +- src/ge/common/model_parser/base.cc | 29 +- src/ge/common/model_parser/graph_parser_util.cc | 483 +++++++++++++++ src/ge/common/model_parser/graph_parser_util.h | 68 +++ src/ge/common/profiling/profiling_manager.cc | 55 +- src/ge/common/profiling/profiling_manager.h | 5 + src/ge/common/types.cc | 3 + src/ge/common/util.cc | 2 +- src/ge/engine_manager/dnnengine_manager.cc | 22 +- src/ge/executor/CMakeLists.txt | 1 + src/ge/executor/ge_executor.cc | 308 +++++++--- src/ge/executor/module.mk | 5 + src/ge/ge_inference.mk | 7 + src/ge/ge_local_engine/engine/host_cpu_engine.h | 3 +- .../ops_kernel_store/op/ge_deleted_op.cc | 1 + src/ge/ge_runner.mk | 7 + src/ge/generator/ge_generator.cc | 66 ++- src/ge/graph/build/graph_builder.cc | 39 +- src/ge/graph/build/graph_builder.h | 1 - src/ge/graph/build/memory/block_mem_assigner.cc | 209 +++++-- src/ge/graph/build/memory/block_mem_assigner.h | 8 + src/ge/graph/build/memory/graph_mem_assigner.cc | 272 ++++++--- src/ge/graph/build/memory/graph_mem_assigner.h | 21 +- src/ge/graph/build/model_builder.cc | 26 + src/ge/graph/build/stream_allocator.cc | 39 ++ src/ge/graph/build/stream_allocator.h | 1 + src/ge/graph/execute/graph_execute.cc | 34 +- src/ge/graph/execute/graph_execute.h | 17 +- src/ge/graph/label/while_label_maker.cc | 3 +- src/ge/graph/load/graph_loader.cc | 16 +- .../load/new_model_manager/cpu_queue_schedule.cc | 100 ++-- .../load/new_model_manager/cpu_queue_schedule.h | 3 +- src/ge/graph/load/new_model_manager/data_dumper.cc | 50 +- .../graph/load/new_model_manager/davinci_model.cc | 646 ++++++++++++--------- .../graph/load/new_model_manager/davinci_model.h | 64 +- .../graph/load/new_model_manager/model_manager.cc | 144 +++-- .../graph/load/new_model_manager/model_manager.h | 16 +- src/ge/graph/load/new_model_manager/model_utils.cc | 3 + .../task_info/end_graph_task_info.cc | 8 +- .../task_info/event_record_task_info.cc | 2 +- .../task_info/event_wait_task_info.cc | 4 +- .../task_info/fusion_start_task_info.cc | 2 +- .../task_info/fusion_stop_task_info.cc | 2 +- .../new_model_manager/task_info/hccl_task_info.cc | 31 +- .../task_info/kernel_ex_task_info.cc | 51 +- .../task_info/kernel_task_info.cc | 144 ++--- .../new_model_manager/task_info/kernel_task_info.h | 1 + .../task_info/label_goto_ex_task_info.cc | 2 +- .../task_info/label_set_task_info.cc | 2 +- .../task_info/label_switch_by_index_task_info.cc | 4 +- .../task_info/memcpy_addr_async_task_info.cc | 27 +- .../task_info/memcpy_addr_async_task_info.h | 4 +- .../task_info/memcpy_async_task_info.cc | 5 +- .../task_info/profiler_trace_task_info.cc | 2 +- .../task_info/stream_active_task_info.cc | 2 +- .../task_info/stream_switch_task_info.cc | 2 +- .../task_info/stream_switchn_task_info.cc | 10 +- .../task_info/super_kernel/super_kernel.cc | 10 +- .../task_info/super_kernel/super_kernel_factory.cc | 19 +- .../load/new_model_manager/zero_copy_offset.cc | 218 +++++++ .../load/new_model_manager/zero_copy_offset.h | 84 +++ .../graph/load/new_model_manager/zero_copy_task.cc | 27 +- .../graph/load/new_model_manager/zero_copy_task.h | 4 +- src/ge/graph/manager/block_memory.h | 43 ++ src/ge/graph/manager/graph_caching_allocator.cc | 14 +- src/ge/graph/manager/graph_manager.cc | 80 ++- src/ge/graph/manager/graph_manager.h | 3 + src/ge/graph/manager/host_mem_manager.cc | 86 +++ src/ge/graph/manager/host_mem_manager.h | 73 +++ src/ge/graph/manager/memory_api.cc | 45 ++ src/ge/graph/manager/rdma_pool_allocator.cc | 179 ++++++ src/ge/graph/manager/rdma_pool_allocator.h | 71 +++ src/ge/graph/manager/trans_var_data_utils.cc | 2 +- src/ge/graph/partition/dynamic_shape_partition.cc | 27 +- src/ge/graph/partition/dynamic_shape_partition.h | 2 + src/ge/graph/partition/graph_partition.cc | 10 + src/ge/graph/passes/atomic_addr_clean_pass.cc | 143 +++-- src/ge/graph/passes/atomic_addr_clean_pass.h | 10 + src/ge/graph/passes/attach_stream_label_pass.cc | 32 +- src/ge/graph/passes/attach_stream_label_pass.h | 8 - src/ge/graph/passes/base_pass.cc | 10 +- src/ge/graph/passes/base_pass.h | 6 +- src/ge/graph/passes/bitcast_pass.cc | 148 +++++ src/ge/graph/passes/bitcast_pass.h | 41 ++ src/ge/graph/passes/cast_translate_pass.cc | 2 +- .../passes/end_of_sequence_add_control_pass.cc | 139 +++++ .../passes/end_of_sequence_add_control_pass.h | 56 ++ src/ge/graph/passes/folding_pass.cc | 2 +- .../input_output_connection_identify_pass.cc | 193 ++++++ .../passes/input_output_connection_identify_pass.h | 75 +++ .../graph/passes/mark_graph_unknown_status_pass.cc | 35 ++ .../graph/passes/mark_graph_unknown_status_pass.h | 28 + src/ge/graph/passes/mark_same_addr_pass.cc | 19 +- src/ge/graph/passes/memcpy_addr_async_pass.cc | 245 ++++++++ src/ge/graph/passes/memcpy_addr_async_pass.h | 51 ++ src/ge/graph/passes/merge_pass.cc | 2 +- src/ge/graph/passes/multi_batch_pass.cc | 135 ++++- src/ge/graph/passes/multi_batch_pass.h | 13 +- src/ge/graph/passes/net_output_pass.cc | 247 +++++--- src/ge/graph/passes/net_output_pass.h | 28 +- src/ge/graph/passes/next_iteration_pass.cc | 8 +- .../graph/passes/resource_pair_add_control_pass.cc | 16 +- .../passes/resource_pair_remove_control_pass.cc | 16 +- .../graph/passes/set_input_output_offset_pass.cc | 231 ++++++++ src/ge/graph/passes/set_input_output_offset_pass.h | 36 ++ .../graph/passes/switch_dead_branch_elimination.cc | 2 +- src/ge/graph/passes/switch_logic_remove_pass.cc | 2 +- .../passes/transop_symmetry_elimination_pass.cc | 132 ++++- .../passes/transop_symmetry_elimination_pass.h | 15 + src/ge/graph/passes/transpose_transdata_pass.cc | 2 +- src/ge/graph/passes/var_is_initialized_op_pass.cc | 2 +- src/ge/graph/passes/variable_ref_delete_op_pass.cc | 21 +- src/ge/graph/preprocess/graph_preprocess.cc | 510 +--------------- src/ge/graph/preprocess/graph_preprocess.h | 1 - src/ge/graph/preprocess/insert_op/ge_aipp_op.cc | 164 +++--- src/ge/graph/preprocess/multi_batch_copy_graph.cc | 103 +++- src/ge/graph/preprocess/multi_batch_copy_graph.h | 3 + .../host_aicpu_engine/common/constant/constant.h | 30 + .../host_aicpu_engine/engine/host_aicpu_engine.cc | 74 +++ .../host_aicpu_engine/engine/host_aicpu_engine.h | 111 ++++ src/ge/host_aicpu_engine/module.mk | 59 ++ .../ops_kernel_store/host_aicpu_ops_kernel_info.cc | 132 +++++ .../ops_kernel_store/host_aicpu_ops_kernel_info.h | 88 +++ .../ops_kernel_store/op/assign_op.cc | 51 ++ .../ops_kernel_store/op/assign_op.h | 41 ++ src/ge/host_aicpu_engine/ops_kernel_store/op/op.h | 48 ++ .../ops_kernel_store/op/op_factory.cc | 55 ++ .../ops_kernel_store/op/op_factory.h | 94 +++ .../ops_kernel_store/op/random_uniform_op.cc | 104 ++++ .../ops_kernel_store/op/random_uniform_op.h | 45 ++ .../ops_kernel_store/op/variable_op.cc | 46 ++ .../ops_kernel_store/op/variable_op.h | 41 ++ src/ge/host_kernels/rank_kernel.cc | 4 + src/ge/hybrid/executor/node_state.cc | 9 +- src/ge/hybrid/executor/subgraph_executor.cc | 4 +- .../executor/worker/shape_inference_engine.cc | 17 +- .../executor/worker/shape_inference_engine.h | 2 + src/ge/hybrid/model/graph_item.cc | 5 +- src/ge/hybrid/model/hybrid_model_builder.cc | 16 +- src/ge/hybrid/model/node_item.cc | 44 +- src/ge/hybrid/model/node_item.h | 3 + .../node_executor/aicore/aicore_node_executor.cc | 19 +- .../hybrid/node_executor/aicore/aicore_op_task.cc | 8 + .../hybrid/node_executor/aicore/aicore_op_task.h | 1 + .../node_executor/aicore/aicore_task_compiler.cc | 2 + .../compiledsubgraph/known_node_executor.cc | 23 +- .../node_executor/controlop/control_op_executor.cc | 48 +- .../node_executor/hccl/hccl_node_executor.cc | 2 +- .../hostaicpu/host_aicpu_node_executor.cc | 198 +++++++ .../hostaicpu/host_aicpu_node_executor.h | 82 +++ .../hostcpu/ge_local_node_executor.cc | 4 +- src/ge/ir_build/atc_ir_common.cc | 242 ++++---- src/ge/ir_build/atc_ir_common.h | 13 +- src/ge/ir_build/ge_ir_build.cc | 19 +- src/ge/offline/main.cc | 77 +-- src/ge/offline/module.mk | 2 +- src/ge/offline/single_op_parser.cc | 81 ++- src/ge/offline/single_op_parser.h | 3 + src/ge/opskernel_manager/ops_kernel_manager.cc | 2 +- src/ge/session/omg.cc | 327 +---------- src/ge/single_op/single_op.cc | 83 +++ src/ge/single_op/single_op.h | 21 + src/ge/single_op/single_op_manager.cc | 115 ++-- src/ge/single_op/single_op_manager.h | 17 +- src/ge/single_op/single_op_model.cc | 90 ++- src/ge/single_op/single_op_model.h | 9 +- src/ge/single_op/stream_resource.cc | 41 +- src/ge/single_op/stream_resource.h | 13 +- src/ge/single_op/task/op_task.cc | 123 +++- src/ge/single_op/task/op_task.h | 34 +- src/ge/single_op/task/tbe_task_builder.cc | 68 ++- src/ge/single_op/task/tbe_task_builder.h | 6 +- tests/st/resnet50/common.cc | 0 .../ut/ge/graph/passes/flow_ctrl_pass_unittest.cc | 0 .../folding_kernel/expanddims_kernel_unittest.cc | 0 tests/ut/ge/graph/passes/merge_pass_unittest.cc | 0 .../ut/ge/graph/passes/net_output_pass_unittest.cc | 0 tests/ut/ge/graph/passes/snapshot_pass_unittest.cc | 0 .../ut/ge/single_op/single_op_manager_unittest.cc | 0 tests/ut/ge/single_op/single_op_model_unittest.cc | 0 third_party/fwkacllib/inc/cce/fwk_adpt_struct.h | 9 + .../fwkacllib/inc/ops/elewise_calculation_ops.h | 23 +- third_party/fwkacllib/inc/ops/image_ops.h | 45 +- third_party/fwkacllib/inc/ops/internal_ops.h | 15 +- .../fwkacllib/inc/ops/matrix_calculation_ops.h | 49 +- third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h | 89 --- third_party/fwkacllib/inc/ops/nn_calculation_ops.h | 257 ++++---- third_party/fwkacllib/inc/ops/nn_detect_ops.h | 184 ++++++ third_party/fwkacllib/inc/ops/nn_norm_ops.h | 28 +- third_party/fwkacllib/inc/ops/nn_pooling_ops.h | 30 +- third_party/fwkacllib/inc/ops/nn_training_ops.h | 10 - third_party/fwkacllib/inc/ops/reduce_ops.h | 12 +- third_party/fwkacllib/inc/ops/selection_ops.h | 39 +- third_party/fwkacllib/inc/ops/transformation_ops.h | 29 + third_party/fwkacllib/inc/register/op_tiling.h | 7 +- third_party/fwkacllib/inc/runtime/base.h | 12 + third_party/fwkacllib/inc/runtime/config.h | 1 + third_party/fwkacllib/inc/runtime/context.h | 46 +- third_party/fwkacllib/inc/runtime/dev.h | 31 + third_party/fwkacllib/inc/runtime/mem.h | 29 +- 242 files changed, 9179 insertions(+), 3126 deletions(-) create mode 100644 inc/framework/memory/memory_api.h create mode 100644 inc/framework/omg/parser/model_parser.h create mode 100644 inc/framework/omg/parser/op_parser.h create mode 100644 inc/framework/omg/parser/parser_api.h create mode 100644 inc/framework/omg/parser/parser_factory.h create mode 100644 inc/framework/omg/parser/parser_inner_ctx.h create mode 100644 inc/framework/omg/parser/weights_parser.h create mode 100644 src/ge/common/model_parser/graph_parser_util.cc create mode 100644 src/ge/common/model_parser/graph_parser_util.h create mode 100644 src/ge/graph/load/new_model_manager/zero_copy_offset.cc create mode 100644 src/ge/graph/load/new_model_manager/zero_copy_offset.h create mode 100644 src/ge/graph/manager/block_memory.h create mode 100644 src/ge/graph/manager/host_mem_manager.cc create mode 100644 src/ge/graph/manager/host_mem_manager.h create mode 100644 src/ge/graph/manager/memory_api.cc create mode 100644 src/ge/graph/manager/rdma_pool_allocator.cc create mode 100644 src/ge/graph/manager/rdma_pool_allocator.h create mode 100644 src/ge/graph/passes/bitcast_pass.cc create mode 100644 src/ge/graph/passes/bitcast_pass.h create mode 100644 src/ge/graph/passes/end_of_sequence_add_control_pass.cc create mode 100644 src/ge/graph/passes/end_of_sequence_add_control_pass.h create mode 100644 src/ge/graph/passes/input_output_connection_identify_pass.cc create mode 100644 src/ge/graph/passes/input_output_connection_identify_pass.h create mode 100644 src/ge/graph/passes/mark_graph_unknown_status_pass.cc create mode 100644 src/ge/graph/passes/mark_graph_unknown_status_pass.h create mode 100644 src/ge/graph/passes/memcpy_addr_async_pass.cc create mode 100644 src/ge/graph/passes/memcpy_addr_async_pass.h create mode 100644 src/ge/graph/passes/set_input_output_offset_pass.cc create mode 100644 src/ge/graph/passes/set_input_output_offset_pass.h create mode 100644 src/ge/host_aicpu_engine/common/constant/constant.h create mode 100644 src/ge/host_aicpu_engine/engine/host_aicpu_engine.cc create mode 100644 src/ge/host_aicpu_engine/engine/host_aicpu_engine.h create mode 100644 src/ge/host_aicpu_engine/module.mk create mode 100644 src/ge/host_aicpu_engine/ops_kernel_store/host_aicpu_ops_kernel_info.cc create mode 100644 src/ge/host_aicpu_engine/ops_kernel_store/host_aicpu_ops_kernel_info.h create mode 100644 src/ge/host_aicpu_engine/ops_kernel_store/op/assign_op.cc create mode 100644 src/ge/host_aicpu_engine/ops_kernel_store/op/assign_op.h create mode 100644 src/ge/host_aicpu_engine/ops_kernel_store/op/op.h create mode 100644 src/ge/host_aicpu_engine/ops_kernel_store/op/op_factory.cc create mode 100644 src/ge/host_aicpu_engine/ops_kernel_store/op/op_factory.h create mode 100644 src/ge/host_aicpu_engine/ops_kernel_store/op/random_uniform_op.cc create mode 100644 src/ge/host_aicpu_engine/ops_kernel_store/op/random_uniform_op.h create mode 100644 src/ge/host_aicpu_engine/ops_kernel_store/op/variable_op.cc create mode 100644 src/ge/host_aicpu_engine/ops_kernel_store/op/variable_op.h create mode 100644 src/ge/hybrid/node_executor/hostaicpu/host_aicpu_node_executor.cc create mode 100644 src/ge/hybrid/node_executor/hostaicpu/host_aicpu_node_executor.h mode change 100755 => 100644 tests/st/resnet50/common.cc mode change 100755 => 100644 tests/ut/ge/graph/passes/flow_ctrl_pass_unittest.cc mode change 100755 => 100644 tests/ut/ge/graph/passes/folding_kernel/expanddims_kernel_unittest.cc mode change 100755 => 100644 tests/ut/ge/graph/passes/merge_pass_unittest.cc mode change 100755 => 100644 tests/ut/ge/graph/passes/net_output_pass_unittest.cc mode change 100755 => 100644 tests/ut/ge/graph/passes/snapshot_pass_unittest.cc mode change 100755 => 100644 tests/ut/ge/single_op/single_op_manager_unittest.cc mode change 100755 => 100644 tests/ut/ge/single_op/single_op_model_unittest.cc diff --git a/RELEASE.md b/RELEASE.md index fb4bedc5..a5c32837 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -18,6 +18,7 @@ Thanks goes to these wonderful people: wangcong,weiyang,yanghaorang,xutianchun,shibeiji,zhouchao, tanghuikang, zhoulili, liujunzhu, zhengyuanhua, taoxiangdong Contributions of any kind are welcome! + # Release 0.5.0-beta ## Major Features and Improvements diff --git a/inc/common/opskernel/ge_task_info.h b/inc/common/opskernel/ge_task_info.h index 8a55b7de..9f3c409d 100644 --- a/inc/common/opskernel/ge_task_info.h +++ b/inc/common/opskernel/ge_task_info.h @@ -63,5 +63,12 @@ struct HcomOpertion { int32_t root; }; +struct HcomRemoteAccessAddrInfo { + uint32_t remotetRankID; + uint64_t remoteAddr; // host embedding table address + uint64_t localAddr; // device HBM address + uint64_t length; // memory Length in Bytes +}; + } // namespace ge #endif // INC_COMMON_OPSKERNEL_GE_TASK_INFO_H_ diff --git a/inc/common/util/error_manager/error_manager.h b/inc/common/util/error_manager/error_manager.h index 76d5ce33..438e68a7 100644 --- a/inc/common/util/error_manager/error_manager.h +++ b/inc/common/util/error_manager/error_manager.h @@ -31,27 +31,37 @@ class ErrorManager { /// /// @brief init - /// @param [in] path current so path + /// @param [in] path: current so path /// @return int 0(success) -1(fail) /// int Init(std::string path); /// /// @brief Report error message - /// @param [in] errCode error code - /// @param [in] mapArgs parameter map + /// @param [in] error_code: error code + /// @param [in] args_map: parameter map /// @return int 0(success) -1(fail) /// int ReportErrMessage(std::string error_code, const std::map &args_map); + /// /// @brief output error message - /// @param [in] handle print handle + /// @param [in] handle: print handle /// @return int 0(success) -1(fail) /// int OutputErrMessage(int handle); + /// + /// @brief output message + /// @param [in] handle: print handle + /// @return int 0(success) -1(fail) + /// + int OutputMessage(int handle); + + /// /// @brief Report error message - /// @param [in] vector parameter key, vector parameter value + /// @param [in] key: vector parameter key + /// @param [in] value: vector parameter value /// void ATCReportErrMessage(std::string error_code, const std::vector &key = {}, const std::vector &value = {}); @@ -60,7 +70,7 @@ class ErrorManager { struct ErrorInfo { std::string error_id; std::string error_message; - std::vector arglist; + std::vector arg_list; }; ErrorManager() {} @@ -77,7 +87,8 @@ class ErrorManager { bool is_init_ = false; std::map error_map_; - std::vector error_message_evc_; + std::vector error_messages_; + std::vector warning_messages_; }; #endif // ERROR_MANAGER_H_ diff --git a/inc/common/util/platform_info.h b/inc/common/util/platform_info.h index 2a145d68..8d2a0579 100644 --- a/inc/common/util/platform_info.h +++ b/inc/common/util/platform_info.h @@ -82,6 +82,8 @@ class PlatformInfoManager { void ParseVectorCoreMemoryRates(map &vectorCoreMemoryRatesMap, PlatformInfo &platformInfoTemp); + void ParseCPUCache(map &CPUCacheMap, PlatformInfo &platformInfoTemp); + void ParseVectorCoreintrinsicDtypeMap(map &vectorCoreintrinsicDtypeMap, PlatformInfo &platformInfoTemp); diff --git a/inc/common/util/platform_info_def.h b/inc/common/util/platform_info_def.h index e840a8b9..c660e8f1 100644 --- a/inc/common/util/platform_info_def.h +++ b/inc/common/util/platform_info_def.h @@ -73,6 +73,8 @@ typedef struct tagAiCoreSpec { typedef struct tagAiCoreMemoryRates { double ddrRate; + double ddrReadRate; + double ddrWriteRate; double l2Rate; double l2ReadRate; double l2WriteRate; @@ -86,6 +88,7 @@ typedef struct tagAiCoreMemoryRates { } AiCoreMemoryRates; typedef struct tagVectorCoreSpec { + double vecFreq; uint64_t vecCalcSize; uint64_t smaskBuffer; uint64_t ubSize; @@ -94,10 +97,15 @@ typedef struct tagVectorCoreSpec { uint64_t ubbankNum; uint64_t ubburstInOneBlock; uint64_t ubbankGroupNum; + uint64_t vectorRegSize; + uint64_t predicateRegSize; + uint64_t addressRegSize; } VectorCoreSpec; typedef struct tagVectorCoreMemoryRates { double ddrRate; + double ddrReadRate; + double ddrWriteRate; double l2Rate; double l2ReadRate; double l2WriteRate; @@ -105,6 +113,11 @@ typedef struct tagVectorCoreMemoryRates { double ubToDdrRate; } VectorCoreMemoryRates; +typedef struct tagCPUCache { + uint32_t AICPUSyncBySW; + uint32_t TSCPUSyncBySW; +} CPUCache; + typedef struct tagPlatformInfo { StrInfo strInfo; SoCInfo socInfo; @@ -113,6 +126,7 @@ typedef struct tagPlatformInfo { map> aiCoreIntrinsicDtypeMap; VectorCoreSpec vectorCoreSpec; VectorCoreMemoryRates vectorCoreMemoryRates; + CPUCache cpucache; map> vectorCoreIntrinsicDtypeMap; } PlatformInfo; diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index cffb28bd..619812d7 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -46,7 +46,6 @@ const char *const OPTION_EXEC_DUMP_STEP = "ge.exec.dumpStep"; const char *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode"; const char *const OPTION_EXEC_ENABLE_DUMP_DEBUG = "ge.exec.enableDumpDebug"; const char *const OPTION_EXEC_DUMP_DEBUG_MODE = "ge.exec.dumpDebugMode"; -const char *const OPTION_EXEC_OP_DEBUG_LEVEL = "ge.exec.opDebugLevel"; const char *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild"; const char *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath"; const char *const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = "ge.exec.enableScopeFusionPasses"; @@ -174,6 +173,9 @@ const char *const kDynamicBatchSize = "ge.dynamicBatchSize"; // configure whether to use dynamic image size const char *const kDynamicImageSize = "ge.dynamicImageSize"; +// Configure whether to use dynamic dims +const char *const kDynamicDims = "ge.dynamicDims"; + // Configure auto tune mode, this option only take effect while AUTO_TUNE_FLAG is Y, // example: GA|RL, support configure multiple, split by | const std::string AUTO_TUNE_MODE = "ge.autoTuneMode"; @@ -269,6 +271,7 @@ static const char *const INPUT_SHAPE = "input_shape"; static const char *const OP_NAME_MAP = "op_name_map"; static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize; static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize; +static const char *const DYNAMIC_DIMS = kDynamicDims; static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str(); static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str(); static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY; @@ -291,10 +294,11 @@ static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c // for interface: aclgrphBuildModel const std::set ir_builder_suppported_options = { - INPUT_FORMAT, INPUT_SHAPE, OP_NAME_MAP, DYNAMIC_BATCH_SIZE, - DYNAMIC_IMAGE_SIZE, INSERT_OP_FILE, PRECISION_MODE, EXEC_DISABLE_REUSED_MEMORY, - AUTO_TUNE_MODE, OUTPUT_TYPE, OUT_NODES, INPUT_FP16_NODES, - LOG_LEVEL}; + INPUT_FORMAT, INPUT_SHAPE, OP_NAME_MAP, + DYNAMIC_BATCH_SIZE, DYNAMIC_IMAGE_SIZE, DYNAMIC_DIMS, + INSERT_OP_FILE, PRECISION_MODE, EXEC_DISABLE_REUSED_MEMORY, + AUTO_TUNE_MODE, OUTPUT_TYPE, OUT_NODES, + INPUT_FP16_NODES, LOG_LEVEL}; // for interface: aclgrphBuildInitialize const std::set global_options = {CORE_TYPE, SOC_VERSION, diff --git a/inc/external/graph/operator_reg.h b/inc/external/graph/operator_reg.h index dfa21acf..d155f4bd 100644 --- a/inc/external/graph/operator_reg.h +++ b/inc/external/graph/operator_reg.h @@ -343,6 +343,7 @@ class OpReg { auto x_type = op.GetInputDesc(in_name).GetDataType(); \ TensorDesc op_output_desc = op.GetOutputDesc(out_name); \ op_output_desc.SetShape(ge::Shape(x_shape)); \ + op_output_desc.SetOriginShape(ge::Shape(x_shape)); \ op_output_desc.SetDataType(x_type); \ return op.UpdateOutputDesc(out_name, op_output_desc); \ } diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h index f07a8fa0..dbf22ead 100644 --- a/inc/framework/common/debug/log.h +++ b/inc/framework/common/debug/log.h @@ -232,7 +232,7 @@ rtError_t _rt_ret = (expr); \ if (_rt_ret != RT_ERROR_NONE) { \ DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ - return ge::RT_FAILED; \ + return RT_ERROR_TO_GE_STATUS(_rt_ret); \ } \ } while (0); diff --git a/inc/framework/common/ge_inner_error_codes.h b/inc/framework/common/ge_inner_error_codes.h index c4a36597..c9470c48 100644 --- a/inc/framework/common/ge_inner_error_codes.h +++ b/inc/framework/common/ge_inner_error_codes.h @@ -280,8 +280,25 @@ GE_ERRORNO_RUNTIME(GE_RTI_CALL_HCCL_REDUCE_SCATTER_FAILED, 47, "call hccl hcom r // Executor module error code definition GE_ERRORNO_EXECUTOR(GE_EXEC_NOT_INIT, 1, "GE Executor is not yet initialized."); -GE_ERRORNO_EXECUTOR(GE_AIPP_NOT_EXIST, 2, "GE AIPP is not exist."); -GE_ERRORNO_EXECUTOR(GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY, 3, "GE Dynamic AIPP is not support to query temporarily."); +GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_PATH_INVALID, 2, "Model file path is invalid."); +GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_KEY_PATH_INVALID, 3, "Key file path of model is invalid."); +GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_ID_INVALID, 4, "Model id is invalid."); +GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_DATA_SIZE_INVALID, 5, "Data size of model is invalid."); +GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_WEIGHT_SIZE_INVALID, 6, "Weight size of model is invalid."); +GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_PARTITION_NUM_INVALID, 7, "Partition number of model is invalid."); +GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_QUEUE_ID_INVALID, 8, "Queue id of model is invalid."); +GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION, 9, "Model does not support encryption."); +GE_ERRORNO_EXECUTOR(GE_EXEC_READ_MODEL_FILE_FAILED, 10, "Failed to read model file."); +GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_MODEL_REPEATED, 11, "The model is loaded repeatedly."); +GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_MODEL_PARTITION_FAILED, 12, "Failed to load model partition."); +GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED, 13, "Failed to load weight partition."); +GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_TASK_PARTITION_FAILED, 14, "Failed to load task partition."); +GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_KERNEL_PARTITION_FAILED, 15, "Failed to load kernel partition."); +GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, 16, "Failed to allocate feature map memory."); +GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, 17, "Failed to allocate weight memory."); +GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_VAR_MEM_FAILED, 18, "Failed to allocate variable memory."); +GE_ERRORNO_EXECUTOR(GE_AIPP_NOT_EXIST, 19, "GE AIPP is not exist."); +GE_ERRORNO_EXECUTOR(GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY, 20, "GE Dynamic AIPP is not support to query temporarily."); // Generator module error code definition GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_INIT_FAILED, 1, "Graph manager initialize failed."); @@ -289,6 +306,8 @@ GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, 2, "Graph mana GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, 3, "Graph manager build graph failed."); GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, 4, "Graph manager finalize failed."); GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_SAVE_MODEL_FAILED, 5, "Graph manager save model failed."); + +#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast(RT_ERROR) } // namespace ge #endif // INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_ diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index 50e41755..db692c36 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -339,6 +339,7 @@ REGISTER_OPTYPE_DECLARE(GETNEXT, "GetNext"); REGISTER_OPTYPE_DECLARE(INITDATA, "InitData"); REGISTER_OPTYPE_DECLARE(TRANSSHAPE, "TransShape") REGISTER_OPTYPE_DECLARE(REFIDENTITY, "RefIdentity"); +REGISTER_OPTYPE_DECLARE(BITCAST, "Bitcast"); // ANN dedicated operator REGISTER_OPTYPE_DECLARE(ANN_MEAN, "AnnMean"); @@ -432,6 +433,8 @@ REGISTER_OPTYPE_DECLARE(HCOMALLREDUCE, "HcomAllReduce"); REGISTER_OPTYPE_DECLARE(HCOMREDUCESCATTER, "HcomReduceScatter"); REGISTER_OPTYPE_DECLARE(HCOMSEND, "HcomSend"); REGISTER_OPTYPE_DECLARE(HCOMRECEIVE, "HcomReceive"); +REGISTER_OPTYPE_DECLARE(HCOMREMOTEREAD, "HcomRemoteRead"); +REGISTER_OPTYPE_DECLARE(HCOMREMOTEWRITE, "HcomRemoteWrite"); REGISTER_OPTYPE_DECLARE(VARASSIGN, "VarAssign"); REGISTER_OPTYPE_DECLARE(VARISINITIALIZEDOP, "VarIsInitializedOp"); @@ -559,6 +562,16 @@ enum ModelCheckType { }; /// +/// @brief dynamic input type +/// +enum DynamicInputType { + FIXED = 0, // default mode + DYNAMIC_BATCH = 1, + DYNAMIC_IMAGE = 2, + DYNAMIC_DIMS = 3 +}; + +/// /// @brief magic number of the model file /// FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MODEL_FILE_MAGIC_NUM; diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 2b7335ef..129b8613 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -26,23 +26,26 @@ #include "common/ge_types.h" #include "common/types.h" #include "graph/tensor.h" +#include "graph/ge_tensor.h" #include "runtime/base.h" namespace ge { class ModelListenerAdapter; class SingleOp; +class DynamicSingleOp; struct RunModelData { uint32_t index; // Data index uint32_t modelId; - std::vector blobs; // All input/output data buffer - uint32_t timestamp; // Data creation time - uint32_t timeout; // Processing timeout - uint64_t request_id = 0; // Request ID - uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 - uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 - uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 + std::vector blobs; // All input/output data buffer + uint32_t timestamp; // Data creation time + uint32_t timeout; // Processing timeout + uint64_t request_id = 0; // Request ID + uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 + uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 + uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 + std::vector dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty }; class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { @@ -87,16 +90,52 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { /// ge::Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height, uint64_t image_width); + + /// + /// @ingroup ge + /// @brief Set dynamic dims info + /// @param [in] model_id: model id allocate from manager + /// @param [in] dynamic_input_addr: dynamic input addr created by user + /// @param [in] length: length of dynamic input addr + /// @param [in] dynamic_dim_num: number of dynamic dimension + /// @param [in] dynamic_dims: array of dynamic dimensions + /// @return execute result + /// + ge::Status SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length, + const std::vector &dynamic_dims); + + /// + /// @ingroup ge + /// @brief Get current dynamic dims info by combined dims + /// @param [in] model_id: model id allocate from manager + /// @param [in] combined_dims: array of combined dimensions + /// @param [out] cur_dynamic_dims: current dynamic dims + /// @return execute result + /// + ge::Status GetCurDynamicDims(uint32_t model_id, const std::vector &combined_dims, + std::vector &cur_dynamic_dims); + /// /// @ingroup ge /// @brief Get dynamic batch_info /// @param [in] model_id /// @param [out] batch_info + /// @param [out] dynamic_type + /// @return execute result + /// + ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector> &batch_info, + int32_t &dynamic_type); + + /// + /// @ingroup ge + /// @brief Get combined dynamic dims info + /// @param [in] model_id + /// @param [out] batch_info /// @return execute result /// - ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector> &batch_info); + ge::Status GetCombinedDynamicDims(uint32_t model_id, std::vector> &batch_info); - ge::Status GetCurShape(const uint32_t model_id, std::vector &batch_info); + ge::Status GetCurShape(const uint32_t model_id, std::vector &batch_info, int32_t &dynamic_type); /// /// @ingroup ge @@ -209,6 +248,13 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { static ge::Status ExecuteAsync(SingleOp *executor, const std::vector &inputs, std::vector &outputs); + static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, + DynamicSingleOp **single_op); + + static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector &input_desc, + const std::vector &inputs, std::vector &output_desc, + std::vector &outputs); + static ge::Status ReleaseSingleOpResource(void *stream); ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count); diff --git a/inc/framework/memory/memory_api.h b/inc/framework/memory/memory_api.h new file mode 100644 index 00000000..656e4710 --- /dev/null +++ b/inc/framework/memory/memory_api.h @@ -0,0 +1,56 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_MEMORY_MEMORY_API_H_ +#define INC_FRAMEWORK_MEMORY_MEMORY_API_H_ + +#include +#include + +#include "ge/ge_api_error_codes.h" +#include "runtime/mem.h" + +namespace ge { +enum MemStorageType { + HBM = 0, + RDMA_HBM, +}; + +struct HostVarInfo { + uint64_t base_addr; + uint64_t var_size; +}; + +/// +/// \param size [in] rdma pool memory size to be allocated. +/// \param mem_type [in] memory type for rdma pool. +/// \return Status result of function +Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_MEMORY_HBM); + +/// +/// \param var_info [in] host variable addr infos. +/// \param mem_type [in] memory type for rdma pool. +/// \return Status result of function +Status RdmaRemoteRegister(const std::vector &var_info, rtMemType_t mem_type = RT_MEMORY_HBM); + +/// +/// \param var_name [in] var_name name of host variable. +/// \param base_addr [out] base_addr vase addr of host variable. +/// \param var_size [out] var_size memory_size of host variable. +/// \return Status result of function +Status GetVarBaseAddrAndSize(const std::string &var_name, uint64_t &base_addr, uint64_t &var_size); +} // namespace ge +#endif // INC_FRAMEWORK_MEMORY_MEMORY_API_H_ diff --git a/inc/framework/omg/omg.h b/inc/framework/omg/omg.h index 45a8896d..6a120439 100644 --- a/inc/framework/omg/omg.h +++ b/inc/framework/omg/omg.h @@ -96,10 +96,6 @@ Status CheckCustomAiCpuOpLib(); Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file); -Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format); - -Status GetOutputLeaf(ge::NodePtr node, std::vector> &output_nodes_info); - void GetOutputNodesNameAndIndex(std::vector> &output_nodes_info, std::vector &output_nodes_name); diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h index 70d59c2f..80361232 100644 --- a/inc/framework/omg/omg_inner_types.h +++ b/inc/framework/omg/omg_inner_types.h @@ -120,6 +120,7 @@ struct OmgContext { bool is_dynamic_input = false; std::string dynamic_batch_size; std::string dynamic_image_size; + std::string dynamic_dims; }; } // namespace ge diff --git a/inc/framework/omg/parser/model_parser.h b/inc/framework/omg/parser/model_parser.h new file mode 100644 index 00000000..b7e77dd4 --- /dev/null +++ b/inc/framework/omg/parser/model_parser.h @@ -0,0 +1,110 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_OMG_PARSER_MODEL_PARSER_H_ +#define INC_FRAMEWORK_OMG_PARSER_MODEL_PARSER_H_ + +#include +#include "framework/common/types.h" +#include "framework/omg/omg_inner_types.h" +#include "graph/attr_value.h" +#include "graph/compute_graph.h" +#include "graph/ge_tensor.h" +#include "graph/graph.h" +#include "graph/op_desc.h" +#include "graph/operator.h" +#include "graph/range_vistor.h" +#include "graph/utils/attr_utils.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/tensor_utils.h" + +using Status = domi::Status; + +namespace domi { +using GetGraphCallback = std::function( + const google::protobuf::Message *root_proto, const std::string &graph)>; +class ModelParser { + public: + ModelParser() {} + + virtual ~ModelParser() {} + + /** + * @ingroup domi_omg + * @brief Analyze network model data + * @param [in] file Network model file path + * @param [in|out] graph Save the network information after analysis + * @return SUCCESS + * @return Others failed + */ + virtual Status Parse(const char *file, ge::Graph &graph) = 0; + + /** + * @ingroup domi_omg + * @brief Parse relevant data from memory and save it to graph + * @param [in] input Model file memory data + * @param [in|out] graph A graph for saving the model information after analysis + * @return SUCCESS + * @return FAILED + * @author + */ + virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; + + /** + * @ingroup domi_omg + * @brief Analyze network model data + * @param [in] proto network model + * @param [in|out] graph Save the network information after analysis + * @return SUCCESS + * @return Others failed + */ + virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0; + + /** + * @ingroup domi_omg + * @brief Analyze callback model data in subgraph + * @param [in] proto network model + * @param [in] callback callback of subgraph + * @param [in|out] graph Save the network information after analysis + * @return SUCCESS + * @return Others failed + */ + virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback, + ge::ComputeGraphPtr &graph) = 0; + /** + * @ingroup domi_omg + * @brief Convert model files to JSON format + * @param [in] model_file Model file path to be converted + * @param [out] json_file Converted JSON file path + * @return SUCCESS + * @return Others failed + */ + virtual Status ToJson(const char *model_file, const char *json_file) { return domi::SUCCESS; } + + /* + * @ingroup domi_omg + * @brief Convert network data type + * @param [in] type Data type to be converted + * @return ge::DataType + */ + virtual ge::DataType ConvertToGeDataType(const uint32_t type) = 0; + + virtual Status ParseAllGraph(const google::protobuf::Message *root_proto, ge::ComputeGraphPtr &root_graph) = 0; +}; +} // namespace domi + +#endif // INC_FRAMEWORK_OMG_PARSER_MODEL_PARSER_H_ diff --git a/inc/framework/omg/parser/op_parser.h b/inc/framework/omg/parser/op_parser.h new file mode 100644 index 00000000..9f1c4be0 --- /dev/null +++ b/inc/framework/omg/parser/op_parser.h @@ -0,0 +1,92 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_OMG_PARSER_OP_PARSER_H_ +#define INC_FRAMEWORK_OMG_PARSER_OP_PARSER_H_ + +#include +#include "common/types.h" +#include "omg/omg_inner_types.h" +#include "proto/om.pb.h" +#include "graph/ge_tensor.h" +#include "graph/op_desc.h" +#include "graph/utils/op_desc_utils.h" + +using google::protobuf::Message; +using Status = domi::Status; + +namespace ge { +/** + * @ingroup domi_omg + * @brief Used to analyze operator information + * + */ +class OpParser { + public: + /** + * @ingroup domi_omg + * @brief Deconstructor + */ + virtual ~OpParser() {} + + /** + * @ingroup domi_omg + * @brief Analytic operator parameters + * @param [in] op_src Parameter data to be resolved + * @param [out] graph Parsed parameter data + * @return SUCCESS + * @return FAILED + */ + virtual Status ParseParams(const Message *op_src, ge::OpDescPtr &op_desc) = 0; + + /** + * @ingroup domi_omg + * @brief Analytic operator parameters + * @param [in] op_src Parameter data to be resolved + * @param [out] Operator parameter data + * @return SUCCESS + * @return FAILED + */ + virtual Status ParseParams(const Message *op_src, ge::Operator &op_dest) = 0; + + /** + * @ingroup domi_omg + * @brief Analytic operator weight information + * @param [in] op_src Weight data to be resolved + * @param [out] op_dest Weight data after analysis + * @return SUCCESS + * @return FAILED + */ + virtual Status ParseWeights(const Message *op_src, ge::NodePtr &node) = 0; + + /** + * @ingroup domi_omg + * @brief Get the format information according to the parameters in the operator + * @param [in] op_src Parameter data to be resolved + * @param [out] format Output the parsed format + * @return SUCCESS + * @return FAILED + */ + virtual Status GetFormat(const Message *op_src, domi::domiTensorFormat_t &format) { + (void)op_src; + // Indicates that the op does not provide a value for format + format = domi::DOMI_TENSOR_RESERVED; + return domi::SUCCESS; + } +}; +} // namespace ge + +#endif // INC_FRAMEWORK_OMG_PARSER_OP_PARSER_H_ diff --git a/inc/framework/omg/parser/parser_api.h b/inc/framework/omg/parser/parser_api.h new file mode 100644 index 00000000..5bfaef4f --- /dev/null +++ b/inc/framework/omg/parser/parser_api.h @@ -0,0 +1,31 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_ +#define INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_ + +#include +#include +#include +#include "ge/ge_api_error_codes.h" + +namespace ge { +// Initialize parser +Status ParserInitialize(const std::map& options); +// Finalize parser, release all resources +Status ParserFinalize(); +} // namespace ge +#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_ diff --git a/inc/framework/omg/parser/parser_factory.h b/inc/framework/omg/parser/parser_factory.h new file mode 100644 index 00000000..2f370c66 --- /dev/null +++ b/inc/framework/omg/parser/parser_factory.h @@ -0,0 +1,138 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_ +#define INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_ + +#include +#include +#include +#include +#include "framework/common/types.h" +#include "framework/omg/omg_inner_types.h" + +using Status = domi::Status; + +namespace domi { +class WeightsParser; +class ModelParser; + +typedef std::shared_ptr (*MODEL_PARSER_CREATOR_FUN)(void); + +// Create modelparser for different frameworks +class ModelParserFactory { + public: + static ModelParserFactory *Instance(); + + /** + * @ingroup domi_omg + * @brief Create a modelparser based on the type entered + * @param [in] type Framework type + * @return Created modelparser + */ + std::shared_ptr CreateModelParser(const domi::FrameworkType type); + + /** + * @ingroup domi_omg + * @brief Register create function + * @param [in] type Framework type + * @param [in] fun ModelParser's create function + */ + void RegisterCreator(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN fun); + + protected: + ModelParserFactory() {} + ~ModelParserFactory(); + + private: + std::map creator_map_; +}; // end class ModelParserFactory + +class ModelParserRegisterar { + public: + ModelParserRegisterar(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN fun) { + ModelParserFactory::Instance()->RegisterCreator(type, fun); + } + ~ModelParserRegisterar() {} +}; + +// Registration macros for model parsers +#define REGISTER_MODEL_PARSER_CREATOR(type, clazz) \ + std::shared_ptr Creator_##type##_Model_Parser() { \ + std::shared_ptr ptr = nullptr; \ + try { \ + ptr = make_shared(); \ + } catch (...) { \ + ptr = nullptr; \ + } \ + return std::shared_ptr(ptr); \ + } \ + ModelParserRegisterar g_##type##_Model_Parser_Creator(type, Creator_##type##_Model_Parser) + +typedef std::shared_ptr (*WEIGHTS_PARSER_CREATOR_FUN)(void); + +// Create weightsparser for different frameworks +class WeightsParserFactory { + public: + static WeightsParserFactory *Instance(); + + /** + * @ingroup domi_omg + * @brief Create weightsparser based on the type entered + * @param [in] type Framework type + * @return Created weightsparser + */ + std::shared_ptr CreateWeightsParser(const domi::FrameworkType type); + + /** + * @ingroup domi_omg + * @brief Register create function + * @param [in] type Framework type + * @param [in] fun WeightsParser's create function + */ + void RegisterCreator(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN fun); + + protected: + WeightsParserFactory() {} + ~WeightsParserFactory(); + + private: + std::map creator_map_; +}; // end class WeightsParserFactory + +class WeightsParserRegisterar { + public: + WeightsParserRegisterar(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN fun) { + WeightsParserFactory::Instance()->RegisterCreator(type, fun); + } + ~WeightsParserRegisterar() {} +}; + +// Register macro of weight resolver +#define REGISTER_WEIGHTS_PARSER_CREATOR(type, clazz) \ + std::shared_ptr Creator_##type##_Weights_Parser() { \ + std::shared_ptr ptr = nullptr; \ + try { \ + ptr = make_shared(); \ + } catch (...) { \ + ptr = nullptr; \ + } \ + return std::shared_ptr(ptr); \ + } \ + WeightsParserRegisterar g_##type##_Weights_Parser_Creator(type, Creator_##type##_Weights_Parser) +}; // namespace domi + +#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_ diff --git a/inc/framework/omg/parser/parser_inner_ctx.h b/inc/framework/omg/parser/parser_inner_ctx.h new file mode 100644 index 00000000..4ea84569 --- /dev/null +++ b/inc/framework/omg/parser/parser_inner_ctx.h @@ -0,0 +1,43 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_OMG_PARSER_PARSER_INNER_CONTEXT_H_ +#define INC_FRAMEWORK_OMG_PARSER_PARSER_INNER_CONTEXT_H_ + +#include +#include +#include +#include +#include +#include +#include "external/register/register_fmk_types.h" +#include "external/register/register_types.h" +#include "framework/omg/omg_inner_types.h" + +namespace ge { +struct ParserContext { + std::unordered_map> input_dims; + domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND; + ; + RunMode run_mode = ONLY_PRE_CHECK; + std::string custom_proto_path; // save caffe custom proto path, used by caffe parse + std::string caffe_proto_path; // save caffe proto path, used by caffe parse +}; + +ParserContext &GetParserContext(); +} // namespace ge + +#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_INNER_CONTEXT_H_ diff --git a/inc/framework/omg/parser/weights_parser.h b/inc/framework/omg/parser/weights_parser.h new file mode 100644 index 00000000..f1f2adab --- /dev/null +++ b/inc/framework/omg/parser/weights_parser.h @@ -0,0 +1,74 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_ +#define INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_ + +#include "graph/graph.h" +#include "graph/attr_value.h" +#include "graph/compute_graph.h" +#include "graph/ge_tensor.h" +#include "graph/op_desc.h" +#include "graph/operator.h" +#include "graph/range_vistor.h" +#include "graph/utils/attr_utils.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/tensor_utils.h" + +namespace domi { +/** + * @ingroup domi_omg + * @brief Weight information resolver + * + */ +class WeightsParser { + public: + /** + * @ingroup domi_omg + * @brief Constructor + */ + WeightsParser() {} + + /** + * @ingroup domi_omg + * @brief Deconstructor + */ + virtual ~WeightsParser() {} + + /** + * @ingroup domi_omg + * @brief Analyze weight data + * @param [in] file Path of weight file after training + * @param [in|out] graph Graph for saving weight information after analysis + * @return SUCCESS + * @return Others failed + */ + virtual Status Parse(const char *file, ge::Graph &graph) = 0; + + /** + * @ingroup domi_omg + * @brief Parse relevant data from memory and save it to graph + * @param [in] input Model file memory data + * @param [in|out] graph A graph for saving the model information after analysis + * @return SUCCESS + * @return FAILED + * @author + */ + virtual Status ParseFromMemory(const char *input, uint32_t lengt, ge::ComputeGraphPtr &graph) = 0; +}; +} // namespace domi + +#endif // INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_ diff --git a/inc/graph/compute_graph.h b/inc/graph/compute_graph.h index 1cb65a6c..f6bc9e41 100644 --- a/inc/graph/compute_graph.h +++ b/inc/graph/compute_graph.h @@ -87,11 +87,14 @@ class ComputeGraph : public std::enable_shared_from_this, public A // AddNode with NodePtr NodePtr AddNode(NodePtr node); NodePtr AddNode(OpDescPtr op); - NodePtr AddNode(OpDescPtr op, int64_t id); // for unserialize. + NodePtr AddNode(OpDescPtr op, int64_t id); // for unserialize NodePtr AddNodeFront(NodePtr node); NodePtr AddNodeFront(const OpDescPtr &op); NodePtr AddInputNode(NodePtr node); NodePtr AddOutputNode(NodePtr node); + // insert node with specific pre_node + NodePtr AddNodeAfter(OpDescPtr &op, const NodePtr &pre_node); + NodePtr AddNodeAfter(NodePtr node, const NodePtr &pre_node); graphStatus RemoveNode(const NodePtr &node); graphStatus RemoveInputNode(const NodePtr &node); diff --git a/inc/graph/debug/ge_attr_define.h b/inc/graph/debug/ge_attr_define.h index ff015be1..f79a8035 100644 --- a/inc/graph/debug/ge_attr_define.h +++ b/inc/graph/debug/ge_attr_define.h @@ -185,6 +185,9 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_ORIGIN_SIZE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NODE_CONNECT_INPUT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NODE_CONNECT_OUTPUT; + // to be deleted GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_TO_BE_DELETED; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PERMUTE_RESHAPE_FUSION; @@ -934,12 +937,14 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PRED_VALUE; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BATCH_NUM; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BATCH_LABEL; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_COMBINED_BATCH; // Control flow GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_SWITCH_COND; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_ACTIVE_STREAM_LIST; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCHN_PRED_VALUE; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SUBGRAPH_FIRST_ACTIVE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_COMBINED_DYNAMIC_DIMS; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_BRANCH_NODE_LABEL; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG; @@ -983,6 +988,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NEE // For mutil-batch GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_INSERT_BY_MBATCH; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MBATCH_ORIGIN_INPUT_DIMS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_TYPE; // For inserted op GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_INSERTED_BY_GE; @@ -1022,6 +1028,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_ADDR; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_VALID_SIZE; +// for unregistered op +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_UNREGST_OPPATH; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_UNREGST_ATTRLIST; + // op overflow dump GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_FLAG; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_MODE; @@ -1075,8 +1085,25 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX; +// atc user def dtype&format +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ATC_USER_DEFINE_DATATYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ATC_USER_DEFINE_FORMAT; + // for fusion op plugin GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSIONOP_ORIGINAL_TYPE; + +// graph partition for aicpu +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PLD_FRONT_NODE_ENGINE_NAME; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_END_REAR_NODE_ENGINE_NAME; + +// input and output memory type +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_VARIABLE_PLACEMENT; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_INPUT_MEMORY_TYPE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OUTPUT_MEMORY_TYPE; + +// input_output_offset +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ZERO_COPY_BASIC_OFFSET; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ZERO_COPY_RELATIVE_OFFSET; } // namespace ge #endif // INC_GRAPH_DEBUG_GE_ATTR_DEFINE_H_ diff --git a/inc/graph/detail/model_serialize_imp.h b/inc/graph/detail/model_serialize_imp.h index b8b3916a..ff27335a 100644 --- a/inc/graph/detail/model_serialize_imp.h +++ b/inc/graph/detail/model_serialize_imp.h @@ -67,6 +67,9 @@ class ModelSerializeImp { bool HandleNodeNameRef(); bool UnserializeOpDesc(OpDescPtr &opDesc, proto::OpDef &opDefProto); + void AttrDefToOpDesc(OpDescPtr &op_desc, std::vector &key_in, std::vector &key_out, + std::vector &value_in, std::vector &value_out, std::vector &opt); + void OpDescToAttrDef(const ConstOpDescPtr &op_desc, proto::OpDef *op_def_proto); bool UnserializeNode(ComputeGraphPtr &graph, proto::OpDef &opDefProto); diff --git a/inc/graph/op_desc.h b/inc/graph/op_desc.h index 1bba7340..27c91efc 100644 --- a/inc/graph/op_desc.h +++ b/inc/graph/op_desc.h @@ -159,10 +159,6 @@ class OpDesc : public std::enable_shared_from_this, public AttrHolder { std::map GetAllInputName() const; - void SetAllInputName(const std::map &input_name_idx); - - std::vector GetAllOptionalInputName() const; - std::map GetAllOutputName(); bool UpdateInputName(std::map inputNameIdx); @@ -300,6 +296,8 @@ class OpDesc : public std::enable_shared_from_this, public AttrHolder { std::map subgraph_ir_names_to_type_; vector inputs_desc_{}; + map input_name_idx_{}; + std::unordered_set optional_input_names_{}; vector outputs_desc_{}; map output_name_idx_{}; std::function infer_func_ = nullptr; diff --git a/src/common/graph/compute_graph.cc b/src/common/graph/compute_graph.cc index 8a0c9f06..ded69b41 100644 --- a/src/common/graph/compute_graph.cc +++ b/src/common/graph/compute_graph.cc @@ -62,18 +62,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY string ComputeGraph::GetName() co GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY void ComputeGraph::SetName(const string &name) { name_ = name; } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY size_t ComputeGraph::GetAllNodesSize() const { - size_t s = nodes_.size(); - for (const auto &sub_graph : sub_graph_) { - s += sub_graph->GetAllNodesSize(); - } - return s; + return GetAllNodes().size(); } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ComputeGraph::Vistor ComputeGraph::GetAllNodes() const { - if (sub_graph_.empty()) { - return Vistor(shared_from_this(), nodes_); - } - std::vector> subgraphs; return AllGraphNodes(subgraphs); } @@ -277,7 +269,7 @@ NodePtr ComputeGraph::AddNodeFront(NodePtr node) { NodePtr ComputeGraph::AddNodeFront(const OpDescPtr &op) { if (op == nullptr) { - GELOGE(GRAPH_FAILED, "The OpDesc ptr should be not null."); + GELOGE(GRAPH_FAILED, "The OpDesc ptr should not be null."); return nullptr; } op->SetId(nodes_.size()); @@ -287,9 +279,38 @@ NodePtr ComputeGraph::AddNodeFront(const OpDescPtr &op) { return AddNodeFront(node_ptr); } +NodePtr ComputeGraph::AddNodeAfter(NodePtr node, const NodePtr &pre_node) { + if (node == nullptr || node->GetOpDesc() == nullptr || pre_node == nullptr) { + GELOGE(GRAPH_FAILED, "The node ptr or op desc should not be null."); + return nullptr; + } + node->GetOpDesc()->SetId(nodes_.size()); + auto node_iter = std::find(nodes_.begin(), nodes_.end(), pre_node); + if (node_iter != nodes_.end()) { + nodes_.insert(node_iter + 1, node); + } else { + GELOGE(GRAPH_FAILED, "Cannot find pre_node in nodes_."); + return nullptr; + } + + return node; +} + +NodePtr ComputeGraph::AddNodeAfter(OpDescPtr &op, const NodePtr &pre_node) { + if (op == nullptr) { + GELOGE(GRAPH_FAILED, "The OpDesc ptr should not be null."); + return nullptr; + } + op->SetId(nodes_.size()); + NodePtr node_ptr = shared_ptr(new (std::nothrow) Node(op, shared_from_this())); + GE_IF_BOOL_EXEC(node_ptr == nullptr, GELOGE(GRAPH_FAILED, "node_ptr is NULL!!!"); return nullptr); + GE_IF_BOOL_EXEC(node_ptr->Init() != GRAPH_SUCCESS, GELOGE(GRAPH_FAILED, "node init failed."); return nullptr); + return AddNodeAfter(node_ptr, pre_node); +} + GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::AddNode(NodePtr node) { if (node == nullptr || node->GetOpDesc() == nullptr) { - GELOGE(GRAPH_FAILED, "The node ptr should be not null."); + GELOGE(GRAPH_FAILED, "The node ptr should not be null."); return nullptr; } node->GetOpDesc()->SetId((int64_t)GetDirectNodesSize()); @@ -299,7 +320,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::AddNode(Nod GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::AddNode(OpDescPtr op) { if (op == nullptr) { - GELOGE(GRAPH_FAILED, "The OpDesc ptr should be not null."); + GELOGE(GRAPH_FAILED, "The OpDesc ptr should not be null."); return nullptr; } op->SetId(GetDirectNodesSize()); @@ -311,7 +332,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::AddNode(OpD NodePtr ComputeGraph::AddNode(OpDescPtr op, int64_t id) { // for unserialize. if (op == nullptr) { - GELOGE(GRAPH_FAILED, "The OpDesc ptr should be not null."); + GELOGE(GRAPH_FAILED, "The OpDesc ptr should not be null."); return nullptr; } op->SetId(id); @@ -324,7 +345,7 @@ NodePtr ComputeGraph::AddNode(OpDescPtr op, int64_t id) { // for unserialize. NodePtr ComputeGraph::AddInputNode(NodePtr node) { if (node == nullptr) { - GELOGE(GRAPH_FAILED, "The node ptr should be not null."); + GELOGE(GRAPH_FAILED, "The node ptr should not be null."); return nullptr; } input_nodes_.push_back(node); @@ -336,7 +357,7 @@ NodePtr ComputeGraph::AddInputNode(NodePtr node) { NodePtr ComputeGraph::AddOutputNode(NodePtr node) { if (node == nullptr || node->GetOpDesc() == nullptr) { - GELOGE(GRAPH_FAILED, "The node ptr or opdesc should be not null."); + GELOGE(GRAPH_FAILED, "The node ptr or opdesc should not be null."); return nullptr; } @@ -372,7 +393,7 @@ graphStatus ComputeGraph::RemoveConstInput(const NodePtr &node) { if (out_anchor->GetOwnerNode()->GetType() == CONSTANT || out_anchor->GetOwnerNode()->GetType() == CONSTANTOP) { GE_CHK_BOOL_RET_STATUS(GraphUtils::RemoveEdge(out_anchor, in_anchor) == GRAPH_SUCCESS, GRAPH_FAILED, "Remove edge from const op failed."); - if (out_anchor->GetOwnerNode()->GetOutDataNodes().size() == 0) { + if (out_anchor->GetOwnerNode()->GetOutNodes().size() == 0) { GELOGI("Remove const op %s.", out_anchor->GetOwnerNode()->GetName().c_str()); auto iter = find(nodes_.begin(), nodes_.end(), out_anchor->GetOwnerNode()); if (iter != nodes_.end()) { @@ -386,7 +407,7 @@ graphStatus ComputeGraph::RemoveConstInput(const NodePtr &node) { GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::RemoveNode(const NodePtr &node) { if (node == nullptr) { - GELOGE(GRAPH_FAILED, "The node ptr should be not null."); + GELOGE(GRAPH_FAILED, "The node ptr should not be null."); return GRAPH_FAILED; } @@ -415,7 +436,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::RemoveN // Used in sub_graph scenes graphStatus ComputeGraph::RemoveInputNode(const NodePtr &node) { if (node == nullptr) { - GELOGE(GRAPH_FAILED, "The node ptr should be not null."); + GELOGE(GRAPH_FAILED, "The node ptr should not be null."); return GRAPH_FAILED; } @@ -430,7 +451,7 @@ graphStatus ComputeGraph::RemoveInputNode(const NodePtr &node) { // Used in sub_graph scenes graphStatus ComputeGraph::RemoveOutputNode(const NodePtr &node) { if (node == nullptr) { - GELOGE(GRAPH_FAILED, "The node ptr should be not null."); + GELOGE(GRAPH_FAILED, "The node ptr should not be null."); return GRAPH_FAILED; } @@ -451,7 +472,7 @@ graphStatus ComputeGraph::RemoveOutputNode(const NodePtr &node) { std::shared_ptr ComputeGraph::AddSubGraph(std::shared_ptr sub_graph) { if (sub_graph == nullptr) { - GELOGE(GRAPH_FAILED, "The graph ptr should be not null."); + GELOGE(GRAPH_FAILED, "The graph ptr should not be null."); return nullptr; } sub_graph_.push_back(sub_graph); @@ -461,7 +482,7 @@ std::shared_ptr ComputeGraph::AddSubGraph(std::shared_ptr &sub_graph) { if (sub_graph == nullptr) { - GELOGE(GRAPH_FAILED, "The graph ptr should be not null."); + GELOGE(GRAPH_FAILED, "The graph ptr should not be null."); return GRAPH_FAILED; } @@ -500,8 +521,7 @@ ComputeGraph::AddSubgraph(const std::string &name, const std::shared_ptrparent_graph_.expired()) { - GE_LOGE("The subgraphs can only be added to the root graph"); - return GRAPH_PARAM_INVALID; + GELOGW("The subgraphs should only be added to the root graph"); } if (name != subgraph->GetName()) { GELOGW("The subgraph name %s is different with input %s", subgraph->GetName().c_str(), name.c_str()); @@ -653,7 +673,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::InsertE GELOGW("node or OpDescPtr is nullptr."); continue; } - GE_IF_BOOL_EXEC(node == nullptr, GELOGE(GRAPH_FAILED, "The node should be not null."); return GRAPH_FAILED); + GE_IF_BOOL_EXEC(node == nullptr, GELOGE(GRAPH_FAILED, "The node should not be null."); return GRAPH_FAILED); if (node->GetOpDesc()->GetType() == RECV) { auto iter = find(node_vec.begin(), node_vec.end(), node); if (iter == node_vec.end()) { @@ -799,7 +819,8 @@ graphStatus ComputeGraph::CollectBreadthOutNode(const NodePtr &node, std::mapGetOpDesc()); + GE_IF_BOOL_EXEC(is_unknown_graph, continue); GE_CHK_BOOL_EXEC(node_ptr->GetOpDesc()->CommonVerify() == GRAPH_SUCCESS, return GRAPH_FAILED, "Verifying %s failed.", node_ptr->GetName().c_str()); } diff --git a/src/common/graph/ge_attr_define.cc b/src/common/graph/ge_attr_define.cc index 90f1bc6a..f78ca7aa 100644 --- a/src/common/graph/ge_attr_define.cc +++ b/src/common/graph/ge_attr_define.cc @@ -158,6 +158,10 @@ const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE = "automic_add_mem_size"; const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS = "_dynamic_output_dims"; const std::string ATTR_NAME_INPUT_ORIGIN_SIZE = "input_origin_size"; +// Identify node connecting to input and output +const std::string ATTR_NAME_NODE_CONNECT_INPUT = "_is_connected_to_data"; +const std::string ATTR_NAME_NODE_CONNECT_OUTPUT = "_is_connected_to_netoutput"; + // To be deleted const std::string ATTR_TO_BE_DELETED = "to_be_deleted"; const std::string PERMUTE_RESHAPE_FUSION = "permute_reshape_fusion"; @@ -905,6 +909,7 @@ const std::string ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE = "is_end_of_inputmem_l const std::string ATTR_NAME_PRED_VALUE = "_pred_value"; const std::string ATTR_NAME_BATCH_NUM = "_batch_num"; const std::string ATTR_NAME_BATCH_LABEL = "_batch_label"; +const std::string ATTR_NAME_COMBINED_BATCH = "_combined_batch"; // Control flow const std::string ATTR_NAME_STREAM_SWITCH_COND = "switch_condition"; @@ -914,6 +919,7 @@ const std::string ATTR_NAME_SWITCHN_PRED_VALUE = "switch_pred_value"; const std::string ATTR_NAME_ITERATORS_PER_LOOP = "iterations_per_loop"; const std::string ATTR_NAME_FLOW_CTRL_NODE_FLAG = "is_flow_ctrl_node"; const std::string ATTR_NAME_SUBGRAPH_FIRST_ACTIVE = "subgraph_first_active"; +const std::string ATTR_NAME_COMBINED_DYNAMIC_DIMS = "combined_dynamic_dims"; const std::string ATTR_NAME_SWITCH_BRANCH_NODE_LABEL = "_switch_branch_node_label"; const std::string ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG = "_switch_true_branch_flag"; @@ -983,6 +989,8 @@ const std::string ATTR_INSERT_BY_MBATCH = "mbatch-inserted-node"; const std::string ATTR_MBATCH_ORIGIN_INPUT_DIMS = "_mbatch_origin_input_dims"; +const std::string ATTR_DYNAMIC_TYPE = "mbatch_dynamic_type"; + // For inserted op const std::string ATTR_INSERTED_BY_GE = "_inserted_by_ge"; @@ -1021,6 +1029,10 @@ const std::string ATTR_NAME_VALID_OUTPUT_SHAPE_LIST_LIST = "_valid_output_shape_ const std::string ATTR_NAME_SLICE_INPUT_OFFSET_LIST_LIST = "_input_offset_list_list"; const std::string ATTR_NAME_SLICE_OUTPUT_OFFSET_LIST_LIST = "_output_offset_list_list"; +// for unregistered op +const std::string ATTR_NAME_UNREGST_OPPATH = "_unregst_oppath"; +const std::string ATTR_NAME_UNREGST_ATTRLIST = "_unregst_attrlist"; + // used for Horovod const std::string ATTR_INTER_EVENT_IDENTIFY = "event_id"; const std::string ATTR_HOROVOD_ATTR_REDUCE_TYPE = "reduce_op"; @@ -1032,6 +1044,23 @@ const std::string ATTR_NAME_HCCL_FUSED_FLAG = "_hccl_fused_node"; const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR = "_alloc_fixed_addr"; const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX = "_alloc_fixed_addr_index"; +// atc user def dtype&format +const std::string ATTR_ATC_USER_DEFINE_DATATYPE = "_user_defined_data_type"; +const std::string ATTR_ATC_USER_DEFINE_FORMAT = "_user_defined_format"; + // for fusion op plugin const std::string ATTR_NAME_FUSIONOP_ORIGINAL_TYPE = "_fusionop_original_type"; + +// graph partition for aicpu +const std::string ATTR_NAME_PLD_FRONT_NODE_ENGINE_NAME = "pld_front_node_engine_name"; +const std::string ATTR_NAME_END_REAR_NODE_ENGINE_NAME = "end_rear_node_engine_name"; + +// input and output memory type +const std::string ATTR_VARIABLE_PLACEMENT = "_variable_placement"; +const std::string ATTR_INPUT_MEMORY_TYPE = "_input_memory_type"; +const std::string ATTR_OUTPUT_MEMORY_TYPE = "_output_memory_type"; + +// input_output_offset +const std::string ATTR_ZERO_COPY_BASIC_OFFSET = "_zero_copy_basic_offset"; +const std::string ATTR_ZERO_COPY_RELATIVE_OFFSET = "_zero_copy_relative_offset"; } // namespace ge diff --git a/src/common/graph/ge_attr_value.cc b/src/common/graph/ge_attr_value.cc index 3a1dec6d..8a62134f 100644 --- a/src/common/graph/ge_attr_value.cc +++ b/src/common/graph/ge_attr_value.cc @@ -1216,27 +1216,16 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDescPtr AttrUtils::CloneOpDesc( GE_CHK_BOOL_EXEC(imp.UnserializeOpDesc(op_desc, *op_def), return op_desc, "op_desc unserialize failed"); op_desc->extAttrs_ = org_op_desc->extAttrs_; - if (op_desc->HasAttr("_input_name_idx_key")) { - if (op_desc->DelAttr("_input_name_idx_key") != SUCCESS) { - GELOGE(GRAPH_FAILED, "DelAttr _input_name_idx_key failed."); - } + // This function may be called by some passes of fusion engine, in this condition, do not need these attribute + if (!op_desc->input_name_idx_.empty()) { + op_desc->input_name_idx_.clear(); } - - if (op_desc->HasAttr("_input_name_idx_value")) { - if (op_desc->DelAttr("_input_name_idx_value") != SUCCESS) { - GELOGE(GRAPH_FAILED, "DelAttr _input_name_idx_value failed."); - } - } - - if (op_desc->HasAttr("_opt_input")) { - if (op_desc->DelAttr("_opt_input") != SUCCESS) { - GELOGE(GRAPH_FAILED, "DelAttr _opt_input failed."); - } - } - if (!op_desc->output_name_idx_.empty()) { op_desc->output_name_idx_.clear(); } + if (!op_desc->optional_input_names_.empty()) { + op_desc->optional_input_names_.clear(); + } return op_desc; } @@ -1260,6 +1249,9 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDescPtr AttrUtils::CopyOpDesc(c op_desc->extAttrs_ = org_op_desc->extAttrs_; + op_desc->input_name_idx_.insert(org_op_desc->input_name_idx_.begin(), org_op_desc->input_name_idx_.end()); + op_desc->optional_input_names_.insert(org_op_desc->optional_input_names_.begin(), + org_op_desc->optional_input_names_.end()); op_desc->output_name_idx_.insert(org_op_desc->output_name_idx_.begin(), org_op_desc->output_name_idx_.end()); op_desc->infer_func_ = org_op_desc->infer_func_; diff --git a/src/common/graph/graph.mk b/src/common/graph/graph.mk index 14e8b4b1..b007dac8 100644 --- a/src/common/graph/graph.mk +++ b/src/common/graph/graph.mk @@ -124,6 +124,7 @@ LOCAL_SRC_FILES := \ ../../out/graph/lib64/stub/operator.cc \ ../../out/graph/lib64/stub/operator_factory.cc \ ../../out/graph/lib64/stub/tensor.cc \ + ../../out/graph/lib64/stub/inference_context.cc \ LOCAL_SHARED_LIBRARIES := @@ -201,6 +202,7 @@ LOCAL_SRC_FILES := \ ../../out/graph/lib64/stub/operator.cc \ ../../out/graph/lib64/stub/operator_factory.cc \ ../../out/graph/lib64/stub/tensor.cc \ + ../../out/graph/lib64/stub/inference_context.cc \ LOCAL_SHARED_LIBRARIES := diff --git a/src/common/graph/model_serialize.cc b/src/common/graph/model_serialize.cc index 4bd5769f..b0077495 100644 --- a/src/common/graph/model_serialize.cc +++ b/src/common/graph/model_serialize.cc @@ -128,21 +128,42 @@ bool ModelSerializeImp::SerializeOpDesc(const ConstOpDescPtr &op_desc, proto::Op for (const std::string &name : op_desc->GetSubgraphInstanceNames()) { op_def_proto->add_subgraph_name(name); } - if (!op_desc->output_name_idx_.empty()) { - proto::AttrDef key; - proto::AttrDef value; - for (auto &item : op_desc->output_name_idx_) { - key.mutable_list()->add_s(item.first); - value.mutable_list()->add_i(item.second); - } - auto op_desc_attr = op_def_proto->mutable_attr(); - op_desc_attr->insert({"_output_name_key", key}); - op_desc_attr->insert({"_output_name_value", value}); - } + OpDescToAttrDef(op_desc, op_def_proto); } return true; } +void ModelSerializeImp::OpDescToAttrDef(const ConstOpDescPtr &op_desc, proto::OpDef *op_def_proto) { + proto::AttrDef key_in; + proto::AttrDef value_in; + auto op_desc_attr = op_def_proto->mutable_attr(); + if (!op_desc->input_name_idx_.empty()) { + for (auto &item : op_desc->input_name_idx_) { + key_in.mutable_list()->add_s(item.first); + value_in.mutable_list()->add_i(item.second); + } + op_desc_attr->insert({"_input_name_key", key_in}); + op_desc_attr->insert({"_input_name_value", value_in}); + } + proto::AttrDef key_out; + proto::AttrDef value_out; + if (!op_desc->output_name_idx_.empty()) { + for (auto &item : op_desc->output_name_idx_) { + key_out.mutable_list()->add_s(item.first); + value_out.mutable_list()->add_i(item.second); + } + op_desc_attr->insert({"_output_name_key", key_out}); + op_desc_attr->insert({"_output_name_value", value_out}); + } + proto::AttrDef opt_input; + if (!op_desc->optional_input_names_.empty()) { + for (auto &item : op_desc->optional_input_names_) { + opt_input.mutable_list()->add_s(item); + } + op_desc_attr->insert({"_opt_input", opt_input}); + } +} + bool ModelSerializeImp::SerializeNode(const NodePtr &node, proto::OpDef *op_def_proto, bool is_dump) { if (node == nullptr || op_def_proto == nullptr) { GELOGE(GRAPH_FAILED, "Input Para Node Invalid"); @@ -236,13 +257,70 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ModelSerializeImp::Unseriali } } +void ModelSerializeImp::AttrDefToOpDesc(OpDescPtr &op_desc, std::vector &key_in, std::vector &key_out, + std::vector &value_in, std::vector &value_out, + std::vector &opt_input) { + if (!key_in.empty()) { + if (key_in.size() != value_in.size()) { + GELOGW("Key and value vector size is different. key_size: %zu, value_size: %zu.", key_out.size(), + value_in.size()); + } else { + for (uint32_t i = 0; i < key_in.size(); ++i) { + op_desc->input_name_idx_.insert(std::pair(key_in.at(i), value_in.at(i))); + } + } + } + if (!key_out.empty()) { + if (key_out.size() != value_out.size()) { + GELOGW("Key and value vector size is different. key_size: %zu, value_size: %zu.", key_out.size(), + value_out.size()); + } else { + for (uint32_t i = 0; i < key_out.size(); ++i) { + op_desc->output_name_idx_.insert(std::pair(key_out.at(i), value_out.at(i))); + } + } + } + if (!opt_input.empty()) { + for (const auto &i : opt_input) { + op_desc->optional_input_names_.insert(i); + } + } +} + bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_def_proto) { - std::vector key; - std::vector value; + std::vector opt_input; + std::vector key_in; + std::vector value_in; + if (op_def_proto.attr().count("_opt_input") > 0) { + auto &name_list = op_def_proto.attr().at("_opt_input").list(); + for (const auto &item_s : name_list.s()) { + opt_input.push_back(item_s); + } + auto op_desc_attr = op_def_proto.mutable_attr(); + op_desc_attr->erase("_opt_input"); + } + if (op_def_proto.attr().count("_input_name_key") > 0) { + auto &output_name_key_list = op_def_proto.attr().at("_input_name_key").list(); + for (const auto &item_s : output_name_key_list.s()) { + key_in.push_back(item_s); + } + auto op_desc_attr = op_def_proto.mutable_attr(); + op_desc_attr->erase("_input_name_key"); + } + if (op_def_proto.attr().count("_input_name_value") > 0) { + auto &input_name_value_list = op_def_proto.attr().at("_input_name_value").list(); + for (const auto &item_i : input_name_value_list.i()) { + value_in.push_back(static_cast(item_i)); + } + auto op_desc_attr = op_def_proto.mutable_attr(); + op_desc_attr->erase("_input_name_value"); + } + std::vector key_out; + std::vector value_out; if (op_def_proto.attr().count("_output_name_key") > 0) { auto &output_name_key_list = op_def_proto.attr().at("_output_name_key").list(); for (const auto &item_s : output_name_key_list.s()) { - key.push_back(item_s); + key_out.push_back(item_s); } auto op_desc_attr = op_def_proto.mutable_attr(); op_desc_attr->erase("_output_name_key"); @@ -250,7 +328,7 @@ bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_d if (op_def_proto.attr().count("_output_name_value") > 0) { auto &output_name_value_list = op_def_proto.attr().at("_output_name_value").list(); for (const auto &item_i : output_name_value_list.i()) { - value.push_back(static_cast(item_i)); + value_out.push_back(static_cast(item_i)); } auto op_desc_attr = op_def_proto.mutable_attr(); op_desc_attr->erase("_output_name_value"); @@ -281,15 +359,8 @@ bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_d op_desc->SetSubgraphInstanceName(graph_index++, name); } - if (key.size() != 0) { - if (key.size() != value.size()) { - GELOGE(GRAPH_FAILED, "twe vector size is different. key_size: %zu, value_size: %zu.", key.size(), value.size()); - } else { - for (uint32_t i = 0; i < key.size(); ++i) { - op_desc->output_name_idx_.insert(std::pair(key.at(i), value.at(i))); - } - } - } + // insert name index by key and value + AttrDefToOpDesc(op_desc, key_in, key_out, value_in, value_out, opt_input); return true; } diff --git a/src/common/graph/node.cc b/src/common/graph/node.cc index df8efd91..b210957d 100644 --- a/src/common/graph/node.cc +++ b/src/common/graph/node.cc @@ -449,9 +449,6 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY InDataAnchorPtr Node::GetInDataAn GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY AnchorPtr Node::GetInAnchor(int idx) const { // Idx can't be less than -1 or >= in_data_anchors_.size(), -1 means index of control anchor_ if (idx < -1 || idx >= static_cast(in_data_anchors_.size())) { - ErrorManager::GetInstance().ATCReportErrMessage( - "E19019", {"opname", "index", "anchorname", "optype"}, - {GetName().c_str(), std::to_string(idx), "in_anchor", GetType().c_str()}); GELOGW("Op[%s] doesn't have index[%d]'s in_anchor which optype is %s.", GetName().c_str(), idx, GetType().c_str()); return nullptr; } else { @@ -743,26 +740,27 @@ graphStatus Node::Verify() const { const string aipp_data_type = "AippData"; const string const_type = "Const"; const string variable_type = "Variable"; + bool is_unknown_graph = GetOwnerComputeGraph()->GetGraphUnknownFlag(); GE_CHK_BOOL_EXEC(op_ != nullptr, return GRAPH_FAILED, "original OpDesc is nullptr"); - for (const auto &in_anchor_ptr : GetAllInDataAnchors()) { - if (in_anchor_ptr == nullptr) { - GELOGW("in anchor ptr is null"); - continue; - } - bool valid_anchor = op_->GetType() == data_type || op_->GetType() == aipp_data_type || - op_->GetType() == const_type || op_->GetType() == variable_type || - op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || in_anchor_ptr->GetPeerAnchors().size() > 0; - if (!valid_anchor) { - ErrorManager::GetInstance().ATCReportErrMessage("E11019", {"name", "index"}, - {GetName(), std::to_string(in_anchor_ptr->GetIdx())}); - GELOGE(GRAPH_FAILED, "operator %s's input %d is not linked.", GetName().c_str(), in_anchor_ptr->GetIdx()); - return GRAPH_FAILED; + if (!is_unknown_graph) { + for (const auto &in_anchor_ptr : GetAllInDataAnchors()) { + GE_IF_BOOL_EXEC(in_anchor_ptr == nullptr, GELOGW("in anchor ptr is null"); continue); + bool valid_anchor = op_->GetType() == data_type || op_->GetType() == aipp_data_type || + op_->GetType() == const_type || op_->GetType() == variable_type || + op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || in_anchor_ptr->GetPeerAnchors().size() > 0; + if (!valid_anchor) { + ErrorManager::GetInstance().ATCReportErrMessage("E11019", {"opname", "index"}, + {GetName(), std::to_string(in_anchor_ptr->GetIdx())}); + GELOGE(GRAPH_FAILED, "operator %s's input %d is not linked.", GetName().c_str(), in_anchor_ptr->GetIdx()); + return GRAPH_FAILED; + } } } string frameworkop_type = "FrameworkOp"; - if (op_->GetType() != frameworkop_type) { + bool need_update_name = op_->GetType() != frameworkop_type && !is_unknown_graph; + if (need_update_name) { auto node_op = ge::OperatorFactoryImpl::CreateOperator("node_op", op_->GetType()); if (node_op.IsEmpty()) { GELOGW("get op from OperatorFactory fail. opType: %s", op_->GetType().c_str()); @@ -782,7 +780,7 @@ graphStatus Node::Verify() const { } node_op.BreakConnect(); } - + GE_IF_BOOL_EXEC(is_unknown_graph, return GRAPH_SUCCESS;); if (op_->CommonVerify() == GRAPH_SUCCESS) { Operator op_proxy = ge::OpDescUtils::CreateOperatorFromNode(shared_from_this()); auto verify_func = op_->GetVerifyFunc(); diff --git a/src/common/graph/op_desc.cc b/src/common/graph/op_desc.cc index e9436a32..24a044f4 100644 --- a/src/common/graph/op_desc.cc +++ b/src/common/graph/op_desc.cc @@ -64,12 +64,6 @@ const std::string ATTR_NAME_IS_INPUT_CONST = "is_input_const"; const std::string ATTR_NAME_OP_INFER_DEPENDS = "_op_infer_depends"; -const std::string ATTR_NAME_OPT_INPUT = "_opt_input"; - -const std::string ATTR_NAME_INPUT_NAME_IDX_KEY = "_input_name_idx_key"; - -const std::string ATTR_NAME_INPUT_NAME_IDX_VALUE = "_input_name_idx_value"; - GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDesc::OpDesc() { op_def_.InitDefault(); if (op_def_.GetProtoMsg() != nullptr) { @@ -211,8 +205,7 @@ graphStatus OpDesc::AddInputDesc(uint32_t index, const ge::GeTensorDesc &input_d } graphStatus OpDesc::AddInputDesc(const string &name, const ge::GeTensorDesc &input_desc) { - auto input_name_idx = GetAllInputName(); - if (input_name_idx.find(name) != input_name_idx.end()) { + if (input_name_idx_.find(name) != input_name_idx_.end()) { GELOGI("input %s is exist, update it", name.c_str()); graphStatus ret = UpdateInputDesc(name, input_desc); return ret; @@ -224,17 +217,15 @@ graphStatus OpDesc::AddInputDesc(const string &name, const ge::GeTensorDesc &inp return GRAPH_FAILED; } inputs_desc_.push_back(in_desc); - (void)input_name_idx.insert(make_pair(name, index)); - SetAllInputName(input_name_idx); + (void)input_name_idx_.insert(make_pair(name, index)); return GRAPH_SUCCESS; } } graphStatus OpDesc::AddInputDescMiddle(const string &name, const unsigned int num, size_t index) { - auto input_name_idx = GetAllInputName(); for (unsigned int i = 0; i < num; i++) { string input_name = name + std::to_string(i); - GE_CHK_BOOL_RET_STATUS((input_name_idx.find(input_name) == input_name_idx.end()), GRAPH_FAILED, + GE_CHK_BOOL_RET_STATUS((input_name_idx_.find(input_name) == input_name_idx_.end()), GRAPH_FAILED, "Add input tensor_desc is existed. name[%s]", input_name.c_str()); std::shared_ptr in_desc = ComGraphMakeShared(GeTensorDesc()); @@ -251,24 +242,22 @@ graphStatus OpDesc::AddInputDescMiddle(const string &name, const unsigned int nu (void)inputs_desc_.insert(inputs_desc_.begin() + index + i, in_desc); // Update index in input_name_idx - for (auto it = input_name_idx.begin(); it != input_name_idx.end(); ++it) { + for (auto it = input_name_idx_.begin(); it != input_name_idx_.end(); ++it) { if (it->second >= (index + i)) { it->second += 1; } } - (void)input_name_idx.insert(make_pair(input_name, i + index)); + (void)input_name_idx_.insert(make_pair(input_name, i + index)); } - SetAllInputName(input_name_idx); return GRAPH_SUCCESS; } graphStatus OpDesc::AddInputDescForward(const string &name, const unsigned int num) { - auto input_name_idx = GetAllInputName(); for (unsigned int i = 0; i < num; i++) { string input_name = name + std::to_string(i); - GE_CHK_BOOL_RET_STATUS((input_name_idx.find(input_name) == input_name_idx.end()), GRAPH_FAILED, + GE_CHK_BOOL_RET_STATUS((input_name_idx_.find(input_name) == input_name_idx_.end()), GRAPH_FAILED, "Add input tensor_desc is existed. name[%s]", input_name.c_str()); std::shared_ptr in_desc = ComGraphMakeShared(GeTensorDesc()); @@ -279,13 +268,12 @@ graphStatus OpDesc::AddInputDescForward(const string &name, const unsigned int n (void)inputs_desc_.insert(inputs_desc_.begin(), in_desc); // Update index in input_name_idx - for (auto it = input_name_idx.begin(); it != input_name_idx.end(); ++it) { + for (auto it = input_name_idx_.begin(); it != input_name_idx_.end(); ++it) { it->second += 1; } - (void)input_name_idx.insert(make_pair(input_name, 0)); + (void)input_name_idx_.insert(make_pair(input_name, 0)); } - SetAllInputName(input_name_idx); return GRAPH_SUCCESS; } @@ -316,19 +304,10 @@ graphStatus OpDesc::AddOutputDescForward(const string &name, const unsigned int graphStatus OpDesc::AddOptionalInputDesc(const string &name, const ge::GeTensorDesc &input_desc) { if (OpDesc::AddInputDesc(name, input_desc) == GRAPH_FAILED) return GRAPH_FAILED; - vector optional_input_names; - (void)AttrUtils::GetListStr(this, ATTR_NAME_OPT_INPUT, optional_input_names); - optional_input_names.push_back(name); - (void)AttrUtils::SetListStr(this, ATTR_NAME_OPT_INPUT, optional_input_names); + (void)optional_input_names_.insert(name); return GRAPH_SUCCESS; } -std::vector OpDesc::GetAllOptionalInputName() const { - vector optional_input_names; - (void)AttrUtils::GetListStr(this, ATTR_NAME_OPT_INPUT, optional_input_names); - return optional_input_names; -} - GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus OpDesc::UpdateInputDesc(uint32_t index, const ge::GeTensorDesc &tensor_Desc) { GE_CHK_BOOL_RET_STATUS((index < inputs_desc_.size()), GRAPH_FAILED, "The index is invalid. index[%u]", index); @@ -343,12 +322,11 @@ OpDesc::UpdateInputDesc(uint32_t index, const ge::GeTensorDesc &tensor_Desc) { } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool OpDesc::OpDescMembersAreEqual(const OpDesc &r_op_desc) const { - return ( - IsEqual(this->GetAllInputName(), r_op_desc.GetAllInputName(), "OpDesc.GetAllInputName()") && - IsEqual(this->output_name_idx_, r_op_desc.output_name_idx_, "OpDesc.output_name_idx_") && - IsEqual(this->GetAllOptionalInputName(), r_op_desc.GetAllOptionalInputName(), "OpDesc.GetAllOptionalInputName()") && - IsEqual(this->engine_name_, r_op_desc.engine_name_, "OpDesc.engine_name_") && - IsEqual(this->op_kernel_lib_name_, r_op_desc.op_kernel_lib_name_, "OpDesc.op_kernel_lib_name_")); + return (IsEqual(this->input_name_idx_, r_op_desc.input_name_idx_, "OpDesc.input_name_idx_") && + IsEqual(this->output_name_idx_, r_op_desc.output_name_idx_, "OpDesc.output_name_idx_") && + IsEqual(this->optional_input_names_, r_op_desc.optional_input_names_, "OpDesc.optional_input_names_") && + IsEqual(this->engine_name_, r_op_desc.engine_name_, "OpDesc.engine_name_") && + IsEqual(this->op_kernel_lib_name_, r_op_desc.op_kernel_lib_name_, "OpDesc.op_kernel_lib_name_")); } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool OpDesc::OpDescAttrsAreEqual(const OpDesc &r_op_desc) const { @@ -422,9 +400,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool OpDesc::operator==(const OpD } graphStatus OpDesc::UpdateInputDesc(const string &name, const ge::GeTensorDesc &tensor_Desc) { - auto input_name_idx = GetAllInputName(); - auto it = input_name_idx.find(name); - if (it == input_name_idx.end()) { + auto it = input_name_idx_.find(name); + if (it == input_name_idx_.end()) { GELOGW("Cann't find the input desc. name[%s]", name.c_str()); return GRAPH_FAILED; } @@ -444,9 +421,8 @@ graphStatus OpDesc::UpdateInputDesc(const string &name, const ge::GeTensorDesc & } bool OpDesc::InputIsSet(const string &name) const { - auto input_name_idx = GetAllInputName(); - auto it = input_name_idx.find(name); - if (it != input_name_idx.end()) { + auto it = input_name_idx_.find(name); + if (it != input_name_idx_.end()) { GE_IF_BOOL_EXEC(it->second >= inputs_desc_.size(), GELOGE(GRAPH_FAILED, "it->second is invalid."); return false); auto tensor_desc = inputs_desc_[it->second]; GE_IF_BOOL_EXEC(tensor_desc == nullptr, GELOGE(GRAPH_FAILED, "tensor_desc is null."); return false); @@ -464,9 +440,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeTensorDesc OpDesc::GetInputDesc } GeTensorDesc OpDesc::GetInputDesc(const string &name) const { - auto input_name_idx = GetAllInputName(); - auto it = input_name_idx.find(name); - GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx.end(), GeTensorDesc()); + auto it = input_name_idx_.find(name); + GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx_.end(), GeTensorDesc()); GE_CHK_BOOL_RET_STATUS_NOLOG(it->second < inputs_desc_.size(), GeTensorDesc()); return *(inputs_desc_[it->second].get()); } @@ -476,7 +451,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeTensorDescPtr OpDesc::MutableIn if (inputs_desc_[index] == nullptr) { return nullptr; } - GE_CHK_BOOL_RET_STATUS(inputs_desc_[index]->IsValid() == GRAPH_SUCCESS, nullptr, "input desc is invalid"); + if (inputs_desc_[index]->IsValid() != GRAPH_SUCCESS) { + GELOGW("input desc is invalid"); + return nullptr; + } return inputs_desc_[index]; } @@ -491,12 +469,11 @@ GeTensorDescPtr OpDesc::MutableInputDesc(const string &name) const { } GE_FUNC_HOST_VISIBILITY OpDesc::Vistor OpDesc::GetAllInputNames() const { - auto input_name_idx = GetAllInputName(); vector names; - if (input_name_idx.empty()) { + if (input_name_idx_.empty()) { return OpDesc::Vistor(shared_from_this(), names); } - for (std::pair input : input_name_idx) { + for (std::pair input : input_name_idx_) { names.push_back(input.first); } return OpDesc::Vistor(shared_from_this(), names); @@ -672,9 +649,8 @@ OpDesc::GetInputDescPtrDfault(uint32_t index) const { } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ConstGeTensorDescPtr OpDesc::GetInputDescPtr(const string &name) const { - auto input_name_idx = GetAllInputName(); - auto it = input_name_idx.find(name); - GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx.end(), shared_ptr()); + auto it = input_name_idx_.find(name); + GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx_.end(), shared_ptr()); return inputs_desc_[it->second]; } @@ -708,45 +684,12 @@ graphStatus OpDesc::AddDynamicOutputDesc(const string &name, const unsigned int } bool OpDesc::IsOptionalInput(const string &name) const { - vector optional_input_names; - (void)AttrUtils::GetListStr(this, ATTR_NAME_OPT_INPUT, optional_input_names); - for (auto &item : optional_input_names) { - if (item == name) { - return true; - } - } - return false; + return optional_input_names_.find(name) != optional_input_names_.end(); } bool OpDesc::IsOptionalInput(uint32_t index) const { return IsOptionalInput(GetInputNameByIndex(index)); } -std::map OpDesc::GetAllInputName() const { - std::map input_name_idx; - std::vector key; - std::vector value; - (void)AttrUtils::GetListStr(this, ATTR_NAME_INPUT_NAME_IDX_KEY, key); - (void)AttrUtils::GetListInt(this, ATTR_NAME_INPUT_NAME_IDX_VALUE, value); - - if (key.size() != value.size()) { - GE_LOGE("twe vector size is different. key_size: %zu, value_size: %zu.", key.size(), value.size()); - } else { - for (uint32_t i = 0; i < key.size(); ++i) { - input_name_idx.insert(std::pair(key.at(i), value.at(i))); - } - } - return input_name_idx; -} - -void OpDesc::SetAllInputName(const std::map &input_name_idx) { - std::vector key; - std::vector value; - for (auto &item : input_name_idx) { - key.emplace_back(item.first); - value.emplace_back(item.second); - } - (void)AttrUtils::SetListStr(this, ATTR_NAME_INPUT_NAME_IDX_KEY, key); - (void)AttrUtils::SetListInt(this, ATTR_NAME_INPUT_NAME_IDX_VALUE, value); -} +std::map OpDesc::GetAllInputName() const { return input_name_idx_; } std::map OpDesc::GetAllOutputName() { return output_name_idx_; } @@ -757,7 +700,6 @@ bool OpDesc::UpdateInputName(std::map input_name_idx) { auto factory_map_size = input_name_idx.size(); // It indicates that some inputs have no optionalname. // The redundant optionalname of factory needs to be deleted and then assigned - auto all_input_name_idx = GetAllInputName(); if (input_map_size < factory_map_size) { GELOGI("UpdateInputName org inputname map size: %zu, factory inputname map size: %zu", input_map_size, factory_map_size); @@ -770,18 +712,17 @@ bool OpDesc::UpdateInputName(std::map input_name_idx) { } if (input_name_idx.size() == input_map_size) { GELOGI("UpdateInputName"); - all_input_name_idx = input_name_idx; + input_name_idx_ = input_name_idx; } else { ret = false; GELOGW("after UpdateInputName factoryName map size : %zu", input_name_idx.size()); } } else if (input_map_size == factory_map_size) { - all_input_name_idx = input_name_idx; + input_name_idx_ = input_name_idx; } else { ret = false; GELOGW("org inputname map size: %zu, factory inputname map size: %zu", input_map_size, factory_map_size); } - SetAllInputName(all_input_name_idx); return ret; } @@ -924,21 +865,19 @@ graphStatus OpDesc::CommonVerify() const { } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY string OpDesc::GetInputNameByIndex(uint32_t index) const { - auto input_name_idx = GetAllInputName(); - auto it = input_name_idx.begin(); - for (; it != input_name_idx.end(); ++it) { + auto it = input_name_idx_.begin(); + for (; it != input_name_idx_.end(); ++it) { if (it->second == index) { break; } } - GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx.end(), ""); + GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx_.end(), ""); return it->first; } int OpDesc::GetInputIndexByName(const string &name) const { - auto input_name_idx = GetAllInputName(); - auto it_find = input_name_idx.find(name); - GE_CHK_BOOL_RET_STATUS_NOLOG(it_find != input_name_idx.end(), -1); + auto it_find = input_name_idx_.find(name); + GE_CHK_BOOL_RET_STATUS_NOLOG(it_find != input_name_idx_.end(), -1); return static_cast(it_find->second); } @@ -1231,12 +1170,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY vector OpDesc::GetIsInputCo GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus OpDesc::RestoreInputNameIdx(const string &name, const int &index) { - auto input_name_idx = GetAllInputName(); - if (input_name_idx.find(name) != input_name_idx.end()) { + if (input_name_idx_.find(name) != input_name_idx_.end()) { GELOGI("Restore input name index is existed. name[%s]", name.c_str()); } - (void)input_name_idx.insert(make_pair(name, index)); - SetAllInputName(input_name_idx); + (void)input_name_idx_.insert(make_pair(name, index)); return GRAPH_SUCCESS; } diff --git a/src/common/graph/ref_relation.cc b/src/common/graph/ref_relation.cc index 906cb5f9..68d4db82 100644 --- a/src/common/graph/ref_relation.cc +++ b/src/common/graph/ref_relation.cc @@ -170,6 +170,7 @@ graphStatus RefRelations::Impl::BuildRefRelationsForWhile( // data_nodes has been sorted // for while, input num must be same as output num auto input_num = root_node->GetAllInDataAnchorsSize(); + NodePtr netoutput = nullptr; size_t ref_i = 0; while (ref_i < input_num) { @@ -212,10 +213,44 @@ graphStatus RefRelations::Impl::BuildRefRelationsForWhile( cell_netoutput_in.in_out = NODE_IN; cell_netoutput_in.in_out_idx = ele.second; ref_i_all_refs.emplace_back(cell_netoutput_in); + netoutput = ele.first; } node_refs.emplace_back(ref_i_all_refs); ref_i++; } + /* There exist scene like the follows, it means data0 data1 netoutput 0'th + * and 1'th tensor should be the same addr. + * Data0 Data1 + * \/ + * /\ + * netoutput + */ + if (netoutput == nullptr) { + return GRAPH_SUCCESS; + } + for (const auto &in_anchor : netoutput->GetAllInDataAnchors()) { + auto peer_out_data_anchor = in_anchor->GetPeerOutAnchor(); + if (peer_out_data_anchor == nullptr) { + continue; + } + auto peer_out_data_node = peer_out_data_anchor->GetOwnerNode(); + if (peer_out_data_node == nullptr || peer_out_data_node->GetOpDesc() == nullptr) { + GELOGW("Node[%s]\'s peer_out_data_node or peer_out_data_node desc is null", (netoutput->GetName()).c_str()); + continue; + } + if (peer_out_data_node->GetType() != DATA) { + continue; + } + auto in_data_anchor_idx = in_anchor->GetIdx(); + auto net_in_desc = netoutput->GetOpDesc()->MutableInputDesc(static_cast(in_data_anchor_idx)); + int ref_d; + int ref_n; + (void)AttrUtils::GetInt(peer_out_data_node->GetOpDesc(), kRefIndex, ref_d); + (void)AttrUtils::GetInt(net_in_desc, kRefIndex, ref_n); + + node_refs[ref_d].insert(node_refs[ref_d].end(), node_refs[ref_n].begin(), node_refs[ref_n].end()); + node_refs[ref_n].insert(node_refs[ref_n].end(), node_refs[ref_d].begin(), node_refs[ref_d].end()); + } return GRAPH_SUCCESS; } diff --git a/src/common/graph/shape_refiner.cc b/src/common/graph/shape_refiner.cc index dc1bc541..ec9a2290 100644 --- a/src/common/graph/shape_refiner.cc +++ b/src/common/graph/shape_refiner.cc @@ -49,10 +49,6 @@ graphStatus ReverseBrushWhileBodySubGraph(const ConstNodePtr &node) { } for (const auto &node_sub : sub_graph_body->GetAllNodes()) { - if (node_sub->GetInDataNodes().size() == 0) { - continue; - } - for (size_t i = 0; i < node_sub->GetAllInDataAnchorsSize(); i++) { auto input_desc = node_sub->GetOpDesc()->MutableInputDesc(i); (void)input_desc->SetUnknownDimNumShape(); @@ -303,11 +299,11 @@ graphStatus UpdateParentNodeOutTensor(const ConstNodePtr &node) { } } // namespace void ShapeRefiner::PrintInOutTensorShape(const ge::NodePtr &node, const std::string &phase) { - if (node == nullptr) { - GELOGE(GRAPH_FAILED, "node is null"); + if (!IsLogEnable(GE, DLOG_DEBUG)) { return; } - if (!IsLogEnable(GE, DLOG_DEBUG)) { + if (node == nullptr) { + GELOGE(GRAPH_FAILED, "node is null"); return; } ge::OpDescPtr op_desc = node->GetOpDesc(); @@ -325,6 +321,18 @@ void ShapeRefiner::PrintInOutTensorShape(const ge::NodePtr &node, const std::str TypeUtils::FormatToSerialString(input_desc->GetFormat()) + " "; } str += input_desc_str; + + input_desc_str = "input origin shape: "; + for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) { + input_desc_str += "["; + for (int64_t dim : input_desc->GetOriginShape().GetDims()) { + input_desc_str += std::to_string(dim) + " "; + } + input_desc_str += "]"; + input_desc_str += ":" + TypeUtils::DataTypeToSerialString(input_desc->GetOriginDataType()) + ":" + + TypeUtils::FormatToSerialString(input_desc->GetOriginFormat()) + " "; + } + str += input_desc_str; } if (op_desc->GetAllOutputsDescSize() != 0) { @@ -342,6 +350,21 @@ void ShapeRefiner::PrintInOutTensorShape(const ge::NodePtr &node, const std::str TypeUtils::FormatToSerialString(output_desc->GetFormat()) + " "; } str += output_desc_str; + + output_desc_str = "output origin shape: "; + for (const auto &output_desc : op_desc->GetAllOutputsDescPtr()) { + if (output_desc == nullptr) { + continue; + } + output_desc_str += "["; + for (int64_t dim : output_desc->GetOriginShape().GetDims()) { + output_desc_str += std::to_string(dim) + " "; + } + output_desc_str += "]"; + output_desc_str += ":" + TypeUtils::DataTypeToSerialString(output_desc->GetOriginDataType()) + ":" + + TypeUtils::FormatToSerialString(output_desc->GetOriginFormat()) + " "; + } + str += output_desc_str; } GELOGD("Shape dump [%s], Node name: [%s]. %s", phase.c_str(), node->GetName().c_str(), str.c_str()); } @@ -362,7 +385,6 @@ graphStatus ShapeRefiner::InferShapeAndType(const ConstNodePtr &node, Operator & return ret; } } - // Get infer func and execute ret = op_desc->CallInferFunc(op); if (ret == GRAPH_PARAM_INVALID) { @@ -479,19 +501,20 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferSh GELOGE(GRAPH_FAILED, "Verifying %s failed.", node->GetName().c_str()); return GRAPH_FAILED; } + PrintInOutTensorShape(node, "before_infershape"); + Operator op = OpDescUtils::CreateOperatorFromNode(node); - auto inference_context = CreateInferenceContext(context_map, node); - if (inference_context == nullptr) { - GELOGE(GRAPH_FAILED, "inference context is null"); - return GRAPH_FAILED; + bool is_unknown_graph = node->GetOwnerComputeGraph()->GetGraphUnknownFlag(); + if (!is_unknown_graph) { + auto inference_context = CreateInferenceContext(context_map, node); + if (inference_context == nullptr) { + GELOGE(GRAPH_FAILED, "inference context is null"); + return GRAPH_FAILED; + } + GELOGD("create context for node:%s, marks %zu", node->GetName().c_str(), inference_context->GetMarks().size()); + op.SetInferenceContext(inference_context); } - GELOGD("create context for node:%s, marks %zu", node->GetName().c_str(), inference_context->GetMarks().size()); - - PrintInOutTensorShape(node, "before_infershape"); - - Operator op = OpDescUtils::CreateOperatorFromNode(node); - op.SetInferenceContext(inference_context); graphStatus status = InferShapeAndType(node, op, before_subgraph); if (status == GRAPH_PARAM_INVALID || status == GRAPH_SUCCESS) { (void)ge::NodeUtils::UpdatePeerNodeInputDesc(node); @@ -499,16 +522,17 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferSh GELOGE(GRAPH_FAILED, "%s call infer function failed.", node->GetName().c_str()); return GRAPH_FAILED; } - - auto ctx_after_infer = op.GetInferenceContext(); - if (ctx_after_infer != nullptr) { - GELOGD("[%s] after infershape. mark:%zu", node->GetName().c_str(), ctx_after_infer->GetMarks().size()); - if (!ctx_after_infer->GetOutputHandleShapesAndTypes().empty() || !ctx_after_infer->GetMarks().empty()) { - GELOGD("[%s] set inference context after. mark:%zu", node->GetName().c_str(), ctx_after_infer->GetMarks().size()); - (void)context_map.emplace(node, ctx_after_infer); + if (!is_unknown_graph) { + auto ctx_after_infer = op.GetInferenceContext(); + if (ctx_after_infer != nullptr) { + GELOGD("[%s] after infershape. mark:%zu", node->GetName().c_str(), ctx_after_infer->GetMarks().size()); + if (!ctx_after_infer->GetOutputHandleShapesAndTypes().empty() || !ctx_after_infer->GetMarks().empty()) { + GELOGD("[%s] set inference context after. mark:%zu", node->GetName().c_str(), + ctx_after_infer->GetMarks().size()); + (void)context_map.emplace(node, ctx_after_infer); + } } } - PrintInOutTensorShape(node, "after_infershape"); return GRAPH_SUCCESS; diff --git a/src/common/graph/utils/ge_ir_utils.h b/src/common/graph/utils/ge_ir_utils.h index b572ab38..9b16be18 100644 --- a/src/common/graph/utils/ge_ir_utils.h +++ b/src/common/graph/utils/ge_ir_utils.h @@ -1,18 +1,18 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef COMMON_GRAPH_UTILS_GE_IR_UTILS_H_ #define COMMON_GRAPH_UTILS_GE_IR_UTILS_H_ diff --git a/src/common/graph/utils/node_utils.cc b/src/common/graph/utils/node_utils.cc index 20bcacfb..0a3dbd2a 100644 --- a/src/common/graph/utils/node_utils.cc +++ b/src/common/graph/utils/node_utils.cc @@ -295,14 +295,16 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer if (op_desc == nullptr) { return GRAPH_FAILED; } + bool is_unknown_graph = node_ptr->GetOwnerComputeGraph()->GetGraphUnknownFlag(); + if (is_unknown_graph) { + return GRAPH_SUCCESS; + } for (const auto &out_anchor : node_ptr->GetAllOutDataAnchors()) { auto output_tensor = op_desc->MutableOutputDesc(out_anchor->GetIdx()); ge::TensorUtils::SetRealDimCnt(*output_tensor, static_cast(output_tensor->GetShape().GetDims().size())); - bool is_unknown_graph = node_ptr->GetOwnerComputeGraph()->GetGraphUnknownFlag(); - if (!is_unknown_graph) { - output_tensor->SetOriginShape(output_tensor->GetShape()); - output_tensor->SetOriginDataType(output_tensor->GetDataType()); - } + output_tensor->SetOriginShape(output_tensor->GetShape()); + output_tensor->SetOriginDataType(output_tensor->GetDataType()); + GELOGD("node name is %s, origin shape is %ld, origin format is %s, origin data type is %s", node_ptr->GetName().c_str(), output_tensor->GetOriginShape().GetShapeSize(), TypeUtils::FormatToSerialString(output_tensor->GetOriginFormat()).c_str(), @@ -321,8 +323,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer GELOGI("Peer input opdesc name is %s, need to flush: shape size is %zu, datatype is %d, original datatype is %d", peer_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), output_tensor->GetShape().GetDimNum(), output_tensor->GetDataType(), output_tensor->GetOriginDataType()); - peer_input_desc->SetShape(output_tensor->GetShape()); peer_input_desc->SetOriginShape(output_tensor->GetOriginShape()); + peer_input_desc->SetShape(output_tensor->GetShape()); peer_input_desc->SetDataType(output_tensor->GetDataType()); peer_input_desc->SetOriginDataType(output_tensor->GetOriginDataType()); std::vector> shape_range; @@ -337,6 +339,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer } return GRAPH_SUCCESS; } + bool NodeUtils::IsInNodesEmpty(const Node &node) { for (const auto &in_anchor : node.in_data_anchors_) { if (in_anchor != nullptr) { @@ -446,6 +449,7 @@ std::string NodeUtils::GetNodeType(const Node &node) { (void)AttrUtils::GetStr(node.GetOpDesc(), ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type); return type; } + ComputeGraphPtr NodeUtils::GetSubgraph(const Node &node, uint32_t index) { auto op_desc = node.GetOpDesc(); if (op_desc == nullptr) { @@ -498,6 +502,14 @@ bool NodeUtils::IsSubgraphInput(const NodePtr &node) { return false; } if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE)) { + bool is_unknown_shape = false; + (void)AttrUtils::GetBool(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape); + if (is_unknown_shape) return false; + } + + if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE) && + kCaseOpTypes.count(parent_op_desc->GetType()) == 0 && kWhileOpTypes.count(parent_op_desc->GetType()) == 0 && + kForOpTypes.count(parent_op_desc->GetType()) == 0 && kIfOpTypes.count(parent_op_desc->GetType()) == 0) { return false; } @@ -519,7 +531,16 @@ bool NodeUtils::IsSubgraphOutput(const NodePtr &node) { if (parent_op_desc == nullptr) { return false; } + if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE)) { + bool is_unknown_shape = false; + (void)AttrUtils::GetBool(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape); + if (is_unknown_shape) return false; + } + + if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE) && + kCaseOpTypes.count(parent_op_desc->GetType()) == 0 && kWhileOpTypes.count(parent_op_desc->GetType()) == 0 && + kForOpTypes.count(parent_op_desc->GetType()) == 0 && kIfOpTypes.count(parent_op_desc->GetType()) == 0) { return false; } diff --git a/src/ge/CMakeLists.txt b/src/ge/CMakeLists.txt index 8d20caf2..e86b3237 100755 --- a/src/ge/CMakeLists.txt +++ b/src/ge/CMakeLists.txt @@ -95,7 +95,18 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" "graph/load/new_model_manager/task_info/task_info.cc" - "graph/manager/*.cc" + "graph/manager/graph_caching_allocator.cc" + "graph/manager/graph_context.cc" + "graph/manager/graph_manager.cc" + "graph/manager/graph_manager_utils.cc" + "graph/manager/graph_mem_allocator.cc" + "graph/manager/graph_var_manager.cc" + "graph/manager/model_manager/event_manager.cc" + "graph/manager/trans_var_data_utils.cc" + "graph/manager/util/debug.cc" + "graph/manager/util/hcom_util.cc" + "graph/manager/util/rt_context_util.cc" + "graph/manager/util/variable_accelerate_ctrl.cc" "graph/manager/model_manager/event_manager.cc" "graph/manager/util/debug.cc" "graph/manager/util/hcom_util.cc" @@ -240,7 +251,17 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" "graph/load/new_model_manager/task_info/task_info.cc" - "graph/manager/*.cc" + "graph/manager/graph_caching_allocator.cc" + "graph/manager/graph_context.cc" + "graph/manager/graph_manager.cc" + "graph/manager/graph_manager_utils.cc" + "graph/manager/graph_mem_allocator.cc" + "graph/manager/graph_var_manager.cc" + "graph/manager/model_manager/event_manager.cc" + "graph/manager/trans_var_data_utils.cc" + "graph/manager/util/debug.cc" + "graph/manager/util/rt_context_util.cc" + "graph/manager/util/variable_accelerate_ctrl.cc" "graph/manager/model_manager/event_manager.cc" "graph/manager/util/debug.cc" "graph/manager/util/rt_context_util.cc" diff --git a/src/ge/common/CMakeLists.txt b/src/ge/common/CMakeLists.txt index adcdb1bc..829691e7 100755 --- a/src/ge/common/CMakeLists.txt +++ b/src/ge/common/CMakeLists.txt @@ -54,6 +54,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "helper/om_file_helper.cc" "math/fp16_math.cc" "model_parser/base.cc" +# "model_parser/graph_parser_util.cc" "model_saver.cc" "op/attr_value_util.cc" "op/ge_op_utils.cc" diff --git a/src/ge/common/formats/utils/formats_trans_utils.h b/src/ge/common/formats/utils/formats_trans_utils.h index 7b902c3e..8b6f0604 100644 --- a/src/ge/common/formats/utils/formats_trans_utils.h +++ b/src/ge/common/formats/utils/formats_trans_utils.h @@ -21,7 +21,6 @@ #include #include #include - #include "external/graph/types.h" #include "graph/ge_tensor.h" diff --git a/src/ge/common/ge/tbe_plugin_manager.cc b/src/ge/common/ge/tbe_plugin_manager.cc index e02b9422..d651ced1 100644 --- a/src/ge/common/ge/tbe_plugin_manager.cc +++ b/src/ge/common/ge/tbe_plugin_manager.cc @@ -182,7 +182,7 @@ void TBEPluginManager::GetCustomOpPath(std::string &customop_path) { } void TBEPluginManager::LoadCustomOpLib() { - LoadPluginSo(); + LoadPluginSo(options_); std::vector registration_datas = domi::OpRegistry::Instance()->registrationDatas; GELOGI("The size of registration_datas is: %zu", registration_datas.size()); @@ -193,10 +193,13 @@ void TBEPluginManager::LoadCustomOpLib() { } } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPluginSo() { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPluginSo( + const std::map &options) { vector file_list; string caffe_parser_path; std::string plugin_path; + + options_ = options; GetCustomOpPath(plugin_path); // Whether there are files in the plugin so path diff --git a/src/ge/common/ge/tbe_plugin_manager.h b/src/ge/common/ge/tbe_plugin_manager.h index 82264ae8..2a55e450 100644 --- a/src/ge/common/ge/tbe_plugin_manager.h +++ b/src/ge/common/ge/tbe_plugin_manager.h @@ -48,7 +48,7 @@ class TBEPluginManager { static void InitPreparation(const std::map &options); - void LoadPluginSo(); + void LoadPluginSo(const std::map &options); private: TBEPluginManager() = default; diff --git a/src/ge/common/ge_common.mk b/src/ge/common/ge_common.mk index e913c8f5..e99ff654 100644 --- a/src/ge/common/ge_common.mk +++ b/src/ge/common/ge_common.mk @@ -36,6 +36,7 @@ GE_COMMON_LOCAL_SRC_FILES := \ properties_manager.cc \ types.cc\ model_parser/base.cc \ + model_parser/graph_parser_util.cc \ tbe_kernel_store.cc \ op/attr_value_util.cc \ op/ge_op_utils.cc \ diff --git a/src/ge/common/helper/model_helper.cc b/src/ge/common/helper/model_helper.cc index e1f7c75f..2450076a 100644 --- a/src/ge/common/helper/model_helper.cc +++ b/src/ge/common/helper/model_helper.cc @@ -91,9 +91,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod } auto ge_model_weight = ge_model->GetWeight(); GELOGI("WEIGHTS_DATA size is %zu , %p", ge_model_weight.GetSize(), ge_model_weight.GetData()); - if (SaveModelPartition(om_file_save_helper, ModelPartitionType::WEIGHTS_DATA, ge_model_weight.GetData(), - ge_model_weight.GetSize()) != SUCCESS) { - GELOGW("Add weight partition failed"); // weight is not necessary + // weight is not necessary + if (ge_model_weight.GetSize() > 0) { + GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, ModelPartitionType::WEIGHTS_DATA, + ge_model_weight.GetData(), ge_model_weight.GetSize()), + "Add weight partition failed"); } TBEKernelStore tbe_kernel_store = ge_model->GetTBEKernelStore(); @@ -239,45 +241,48 @@ ModelHelper::SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::strin FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(const ge::ModelData &model_data) { if (model_data.model_data == nullptr || model_data.model_len == 0) { - GELOGE(FAILED, "Model_data is nullptr, or model_data_size is 0"); - return FAILED; + GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "Model_data is nullptr, or model_data_size is 0"); + return GE_EXEC_MODEL_DATA_SIZE_INVALID; } if (is_assign_model_) { - GELOGE(FAILED, "Model helper has already loaded!"); - return FAILED; + GELOGE(GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!"); + return GE_EXEC_LOAD_MODEL_REPEATED; } if (ReleaseLocalModelData() != SUCCESS) { - GELOGE(FAILED, "ReleaseLocalModelData failed."); - return FAILED; + GELOGE(INTERNAL_ERROR, "ReleaseLocalModelData failed."); + return INTERNAL_ERROR; } + Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); if (ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_) != SUCCESS) { - GELOGE(FAILED, "Parse model content failed!"); - return FAILED; + GELOGE(status, "Parse model content failed!"); + return status; } file_header_ = reinterpret_cast(model_data.model_data); OmFileLoadHelper om_load_helper; - if (om_load_helper.Init(model_addr_tmp_, model_len_tmp_) != SUCCESS) { - GELOGE(FAILED, "Om_load_helper init failed"); + status = om_load_helper.Init(model_addr_tmp_, model_len_tmp_); + if (status != SUCCESS) { + GELOGE(status, "Om_load_helper init failed"); model_addr_tmp_ = nullptr; - return FAILED; + return status; } auto partition_table = reinterpret_cast(model_addr_tmp_); if (partition_table->num == kOriginalOmPartitionNum) { model_addr_tmp_ = nullptr; - GELOGE(FAILED, "om model is error,please use executable om model"); - return FAILED; + GELOGE(GE_EXEC_MODEL_PARTITION_NUM_INVALID, "om model is error,please use executable om model"); + return GE_EXEC_MODEL_PARTITION_NUM_INVALID; } // Encrypt model need to del temp model/no encrypt model don't need to del model model_addr_tmp_ = nullptr; - if (GenerateGeModel(om_load_helper) != SUCCESS) { - GELOGE(FAILED, "GenerateGeModel failed"); - return FAILED; + status = GenerateGeModel(om_load_helper); + if (status != SUCCESS) { + GELOGE(status, "GenerateGeModel failed"); + return status; } is_assign_model_ = true; @@ -289,19 +294,19 @@ Status ModelHelper::GenerateGeModel(OmFileLoadHelper &om_load_helper) { GE_CHECK_NOTNULL(model_); Status ret = LoadModelData(om_load_helper); if (ret != SUCCESS) { - return ret; + return GE_EXEC_LOAD_MODEL_PARTITION_FAILED; } ret = LoadWeights(om_load_helper); if (ret != SUCCESS) { - return ret; + return GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED; } ret = LoadTask(om_load_helper); if (ret != SUCCESS) { - return ret; + return GE_EXEC_LOAD_TASK_PARTITION_FAILED; } ret = LoadTBEKernelStore(om_load_helper); if (ret != SUCCESS) { - return ret; + return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; } return SUCCESS; } diff --git a/src/ge/common/helper/om_file_helper.cc b/src/ge/common/helper/om_file_helper.cc index 0d58fe71..f25e2af3 100644 --- a/src/ge/common/helper/om_file_helper.cc +++ b/src/ge/common/helper/om_file_helper.cc @@ -41,8 +41,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(c FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(uint8_t *model_data, const uint32_t model_data_size) { - if (LoadModelPartitionTable(model_data, model_data_size) != SUCCESS) { - return FAILED; + Status status = LoadModelPartitionTable(model_data, model_data_size); + if (status != SUCCESS) { + return status; } is_inited_ = true; return SUCCESS; @@ -66,7 +67,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetMod } if (!found) { - if (type != ModelPartitionType::TBE_KERNELS) { + if (type != ModelPartitionType::TBE_KERNELS && type != ModelPartitionType::WEIGHTS_DATA) { GELOGE(FAILED, "GetModelPartition:type:%d is not in partition_datas!", static_cast(type)); return FAILED; } @@ -83,7 +84,9 @@ Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const { // Model length too small if (model.model_len < (sizeof(ModelFileHeader) + sizeof(ModelPartitionTable))) { - GELOGE(PARAM_INVALID, "Invalid model. length < sizeof(ModelFileHeader) + sizeof(ModelPartitionTable)."); + GELOGE(PARAM_INVALID, + "Invalid model. length[%u] < sizeof(ModelFileHeader)[%zu] + sizeof(ModelPartitionTable)[%zu].", + model.model_len, sizeof(ModelFileHeader), sizeof(ModelPartitionTable)); return PARAM_INVALID; } @@ -93,9 +96,9 @@ Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const { if ((model_header->length != model.model_len - sizeof(ModelFileHeader)) || (MODEL_FILE_MAGIC_NUM != model_header->magic)) { GELOGE(PARAM_INVALID, - "Invalid model. file_header->length(%u) + sizeof(ModelFileHeader)(%zu) != model->model_len(%u) || " - "MODEL_FILE_MAGIC_NUM != file_header->magic", - model_header->length, sizeof(ModelFileHeader), model.model_len); + "Invalid model. file_header->length[%u] + sizeof(ModelFileHeader)[%zu] != model->model_len[%u] || " + "MODEL_FILE_MAGIC_NUM[%u] != file_header->magic[%u]", + model_header->length, sizeof(ModelFileHeader), model.model_len, MODEL_FILE_MAGIC_NUM, model_header->magic); return PARAM_INVALID; } return SUCCESS; @@ -112,16 +115,16 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint // Original model partition include graph-info if ((partition_table->num != PARTITION_SIZE) && (partition_table->num != (PARTITION_SIZE - 1)) && (partition_table->num != 1)) { - GELOGE(PARAM_INVALID, "Invalid partition_table->num:%u", partition_table->num); - return PARAM_INVALID; + GELOGE(GE_EXEC_MODEL_PARTITION_NUM_INVALID, "Invalid partition_table->num:%u", partition_table->num); + return GE_EXEC_MODEL_PARTITION_NUM_INVALID; } size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); GELOGI("ModelPartitionTable num :%u, ModelFileHeader length :%zu, ModelPartitionTable length :%zu", partition_table->num, sizeof(ModelFileHeader), mem_offset); if (model_data_size <= mem_offset) { - GELOGE(PARAM_INVALID, "invalid model data, partition_table->num:%u, model data size %u", partition_table->num, - model_data_size); - return PARAM_INVALID; + GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", + partition_table->num, model_data_size); + return GE_EXEC_MODEL_DATA_SIZE_INVALID; } for (uint32_t i = 0; i < partition_table->num; i++) { ModelPartition partition; @@ -131,9 +134,9 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint context_.partition_datas_.push_back(partition); if (partition.size > model_data_size || mem_offset > model_data_size - partition.size) { - GELOGE(PARAM_INVALID, "The partition size %zu is greater than the model data size %u.", + GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.", partition.size + mem_offset, model_data_size); - return PARAM_INVALID; + return GE_EXEC_MODEL_DATA_SIZE_INVALID; } mem_offset += partition.size; GELOGI("Partition, type:%d, size:%u", static_cast(partition.type), partition.size); diff --git a/src/ge/common/model_parser/base.cc b/src/ge/common/model_parser/base.cc index fb6a647f..3b6b9407 100644 --- a/src/ge/common/model_parser/base.cc +++ b/src/ge/common/model_parser/base.cc @@ -35,15 +35,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro ge::ModelData &model_data) { std::string real_path = RealPath(model_path); if (real_path.empty()) { - GELOGE(PARAM_INVALID, "Model file path '%s' is invalid", model_path); - return PARAM_INVALID; + GELOGE(GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path); + return GE_EXEC_MODEL_PATH_INVALID; } - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(model_path) == -1, return FAILED, "File size not valid."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(model_path) == -1, return GE_EXEC_READ_MODEL_FILE_FAILED, + "File size not valid."); std::ifstream fs(real_path.c_str(), std::ifstream::binary); - GE_CHK_BOOL_RET_STATUS(fs.is_open(), FAILED, "Open file failed! path:%s", model_path); + GE_CHK_BOOL_RET_STATUS(fs.is_open(), GE_EXEC_READ_MODEL_FILE_FAILED, "Open file failed! path:%s", model_path); // get length of file: (void)fs.seekg(0, std::ifstream::end); @@ -55,7 +56,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro char *data = new (std::nothrow) char[len]; if (data == nullptr) { - GELOGE(MEMALLOC_FAILED, "Load model From file failed, bad memory allocation occur. (need:%ld)", len); + GELOGE(MEMALLOC_FAILED, "Load model From file failed, bad memory allocation occur. (need:%u)", len); return MEMALLOC_FAILED; } @@ -79,31 +80,33 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::ParseMo GE_CHECK_NOTNULL(model.model_data); // Model length too small - GE_CHK_BOOL_RET_STATUS(model.model_len >= sizeof(ModelFileHeader), PARAM_INVALID, - "Invalid model. length < sizeof(ModelFileHeader)."); + GE_CHK_BOOL_RET_STATUS(model.model_len >= sizeof(ModelFileHeader), GE_EXEC_MODEL_DATA_SIZE_INVALID, + "Invalid model. Model data size %u must be greater than or equal to %zu.", model.model_len, + sizeof(ModelFileHeader)); // Get file header auto file_header = reinterpret_cast(model.model_data); // Determine whether the file length and magic number match GE_CHK_BOOL_RET_STATUS( file_header->length == model.model_len - sizeof(ModelFileHeader) && file_header->magic == MODEL_FILE_MAGIC_NUM, - PARAM_INVALID, - "Invalid model. file_header->length + sizeof(ModelFileHeader) != model->model_len || MODEL_FILE_MAGIC_NUM != " - "file_header->magic"); + GE_EXEC_MODEL_DATA_SIZE_INVALID, + "Invalid model. file_header->length[%u] + sizeof(ModelFileHeader)[%zu] != model->model_len[%u] || " + "MODEL_FILE_MAGIC_NUM[%u] != file_header->magic[%u]", + file_header->length, sizeof(ModelFileHeader), model.model_len, MODEL_FILE_MAGIC_NUM, file_header->magic); Status res = SUCCESS; // Get data address uint8_t *data = reinterpret_cast(model.model_data) + sizeof(ModelFileHeader); if (file_header->is_encrypt == ModelEncryptType::UNENCRYPTED) { // Unencrypted model - GE_CHK_BOOL_RET_STATUS(model.key.empty(), PARAM_INVALID, + GE_CHK_BOOL_RET_STATUS(model.key.empty(), GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION, "Invalid param. model is unencrypted, but key is not empty."); model_data = data; model_len = file_header->length; GELOGI("Model_len is %u, model_file_head_len is %zu.", model_len, sizeof(ModelFileHeader)); } else { - GELOGE(PARAM_INVALID, "Invalid model. ModelEncryptType not supported."); - res = PARAM_INVALID; + GELOGE(GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION, "Invalid model. ModelEncryptType not supported."); + res = GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION; } return res; diff --git a/src/ge/common/model_parser/graph_parser_util.cc b/src/ge/common/model_parser/graph_parser_util.cc new file mode 100644 index 00000000..38eaa60a --- /dev/null +++ b/src/ge/common/model_parser/graph_parser_util.cc @@ -0,0 +1,483 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph_parser_util.h" +#include +#include "common/auth/file_saver.h" +#include "common/convert/pb2json.h" +#include "common/debug/log.h" +#include "common/debug/memory_dumper.h" +#include "common/model_parser/base.h" +#include "common/model_saver.h" +#include "common/properties_manager.h" +#include "common/string_util.h" +#include "common/types.h" +#include "common/util.h" +#include "common/util/error_manager/error_manager.h" +#include "framework/common/debug/ge_log.h" +#include "framework/omg/parser/parser_inner_ctx.h" +#include "graph/compute_graph.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/optimize/common/params.h" +#include "graph/utils/type_utils.h" +#include "omg/omg_inner_types.h" +#include "omg/parser/model_parser.h" +#include "omg/parser/parser_factory.h" +#include "omg/parser/weights_parser.h" +#include "parser/common/pre_checker.h" +#include "proto/ge_ir.pb.h" +#include "register/op_registry.h" +#include "external/register/register_types.h" + +namespace ge { +namespace { +// The function is incomplete. Currently, only l2_optimize, off_optimize is supported. +const char *const kInputShapeSample1 = "\"input_name1:n1,c1,h1,w1\""; +const char *const kInputShapeSample2 = "\"input_name1:1,3,224,224\""; +const char *const kSplitError1 = "size not equal to 2 split by \":\""; +const char *const kEmptyError = "can not be empty"; +const char *const kFloatNumError = "exist float number"; +const char *const kDigitError = "is not digit"; + +vector SplitInputShape(const std::string &input_shape) { + vector shape_pair_vec; + size_t pos = input_shape.rfind(":"); + if (pos != std::string::npos) { + shape_pair_vec.emplace_back(input_shape.substr(0, pos)); + shape_pair_vec.emplace_back(input_shape.substr(pos + 1, input_shape.size() - pos)); + } + return shape_pair_vec; +} + +static std::map output_type_str_to_datatype = { + {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}}; + +static bool CheckInputTrueOrFalse(const std::string &s, const std::string &atc_param) { + if ((s == "true") || (s == "false")) { + return true; + } else { + ErrorManager::GetInstance().ATCReportErrMessage("E10033", {"parameter", "value"}, {atc_param, s}); + GELOGE(PARAM_INVALID, "Input parameter[--%s]'s value[%s] must be true or false.", atc_param.c_str(), s.c_str()); + return false; + } +} + +bool CheckDigitStr(std::string &str) { + for (char c : str) { + if (!isdigit(c)) { + GELOGE(domi::FAILED, "value[%s] is not positive integer", str.c_str()); + return false; + } + } + return true; +} + +Status StringToInt(std::string &str, int32_t &value) { + try { + if (!CheckDigitStr(str)) { + GELOGE(PARAM_INVALID, "Invalid of digit string: %s ", str.c_str()); + ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"output_type", str}); + return PARAM_INVALID; + } + value = stoi(str); + } catch (std::invalid_argument &) { + GELOGE(PARAM_INVALID, "Invalid of digit string: %s, catch invalid_argument.", str.c_str()); + ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"output_type", str}); + return PARAM_INVALID; + } catch (std::out_of_range &) { + GELOGE(PARAM_INVALID, "Invalid of digit string: %s, catch out_of_range.", str.c_str()); + ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, {"output_type", str}); + return PARAM_INVALID; + } + return SUCCESS; +} + +Status VerifyOutputTypeAndOutNodes(std::vector &out_type_vec) { + std::vector> user_out_nodes = domi::GetContext().user_out_nodes; + std::set out_nodes_info; + for (uint32_t i = 0; i < user_out_nodes.size(); ++i) { + // out_nodes set should include output_type and output_format + std::string tmp = user_out_nodes[i].first + ":" + to_string(user_out_nodes[i].second); + out_nodes_info.emplace(tmp); + } + for (uint32_t i = 0; i < out_type_vec.size(); ++i) { + if (out_nodes_info.find(out_type_vec[i]) == out_nodes_info.end()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10059", {"value"}, {out_type_vec[i]}); + GELOGE(domi::FAILED, "Can not find this node (%s) in out_nodes.", out_type_vec[i].c_str()); + return domi::FAILED; + } + } + return domi::SUCCESS; +} + +Status ParseOutputType(const std::string &output_type, std::map> &out_type_index_map, + std::map> &out_type_dt_map) { + if (output_type.find(':') == std::string::npos) { + GELOGI("output_type is not multiple nodes, means all out nodes"); + auto it = output_type_str_to_datatype.find(output_type); + if (it == output_type_str_to_datatype.end()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10042", {"value"}, {output_type}); + GELOGE(ge::PARAM_INVALID, "Invalid value for --output_type[%s], only support DT_FLOAT, DT_FLOAT16, DT_UINT8!!", + output_type.c_str()); + return domi::FAILED; + } + return domi::SUCCESS; + } + std::vector out_type_vec; + vector nodes_v = StringUtils::Split(output_type, ';'); + for (const string &node : nodes_v) { + vector node_index_type_v = StringUtils::Split(node, ':'); + if (node_index_type_v.size() != 3) { // The size must be 3. + ErrorManager::GetInstance().ATCReportErrMessage("E10058", {"value"}, {node}); + GELOGE(PARAM_INVALID, + "The param of output_type is invalid, the correct format is [opname:index:dtype]," + "while the actual input is %s.", + node.c_str()); + return domi::FAILED; + } + ge::DataType tmp_dt; + std::string node_name = StringUtils::Trim(node_index_type_v[0]); + std::string index_str = StringUtils::Trim(node_index_type_v[1]); + int32_t index; + if (StringToInt(index_str, index) != SUCCESS) { + GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s.", index_str.c_str()); + return domi::FAILED; + } + std::string dt_value = StringUtils::Trim(node_index_type_v[2]); + auto it = output_type_str_to_datatype.find(dt_value); + if (it == output_type_str_to_datatype.end()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10042", {"value"}, {dt_value}); + GELOGE(ge::PARAM_INVALID, "output_type [%s] is invalid.", dt_value.c_str()); + return domi::FAILED; + } else { + tmp_dt = it->second; + } + out_type_vec.push_back(node_name + ":" + index_str); + auto it_index = out_type_index_map.find(node_name); + if (it_index == out_type_index_map.end()) { + vector tmp_vec; + tmp_vec.push_back(index); + out_type_index_map.emplace(node_name, tmp_vec); + } else { + it_index->second.push_back(index); + } + + auto it_dt = out_type_dt_map.find(node_name); + if (it_dt == out_type_dt_map.end()) { + vector tmp_vec; + tmp_vec.push_back(tmp_dt); + out_type_dt_map.emplace(node_name, tmp_vec); + } else { + it_dt->second.push_back(tmp_dt); + } + } + return VerifyOutputTypeAndOutNodes(out_type_vec); +} + +Status CheckOutNode(ge::OpDescPtr op_desc, int32_t index) { + if (op_desc->GetType() == DATA) { + GELOGE(domi::FAILED, "out_nodes [%s] can not be set input data, please check", op_desc->GetName().c_str()); + ErrorManager::GetInstance().ATCReportErrMessage("E10068", {"parameter", "value", "reason"}, + {"out_nodes", op_desc->GetName(), "it can not be set input data"}); + return domi::FAILED; + } + + int32_t out_size = op_desc->GetOutputsSize(); + if (index < 0 || index >= out_size) { + GELOGE(domi::FAILED, + "out_node [%s] output index:%d must be smaller " + "than node output size:%d and can not be negative!", + op_desc->GetName().c_str(), index, out_size); + std::string fail_reason = "output index:" + to_string(index) + + " must be smaller than output size:" + to_string(out_size) + " and can not be negative!"; + ErrorManager::GetInstance().ATCReportErrMessage("E10060", {"parameter", "value", "reason"}, + {"out_nodes", op_desc->GetName(), fail_reason}); + return domi::FAILED; + } + return domi::SUCCESS; +} + +Status GetOutputLeaf(NodePtr node, std::vector> &output_nodes_info, + std::vector &output_nodes_name) { + ge::OpDescPtr tmpDescPtr = node->GetOpDesc(); + if (tmpDescPtr == nullptr) { + GELOGE(domi::FAILED, "Get outnode op desc fail."); + return domi::FAILED; + } + size_t size = tmpDescPtr->GetOutputsSize(); + if (node->GetType() != NETOUTPUT) { + for (size_t index = 0; index < size; ++index) { + output_nodes_info.push_back(std::make_pair(node, index)); + output_nodes_name.push_back(node->GetName()); + } + } else { + const auto in_anchors = node->GetAllInDataAnchors(); + for (auto in_anchor : in_anchors) { + auto out_anchor = in_anchor->GetPeerOutAnchor(); + if (out_anchor == nullptr) { + GELOGE(domi::FAILED, "Get leaf node op desc fail."); + return domi::FAILED; + } + auto out_node = out_anchor->GetOwnerNode(); + output_nodes_info.push_back(std::make_pair(out_node, out_anchor->GetIdx())); + output_nodes_name.push_back(out_node->GetName()); + } + } + return SUCCESS; +} +} // namespace + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ParseOutputFp16NodesFormat(const string &is_output_fp16) { + if (is_output_fp16.empty()) { + return SUCCESS; + } + + vector &output_formats = domi::GetContext().output_formats; + output_formats.clear(); + vector node_format_vec = StringUtils::Split(is_output_fp16, ','); + for (auto &is_fp16 : node_format_vec) { + StringUtils::Trim(is_fp16); + if (!CheckInputTrueOrFalse(is_fp16, "is_output_adjust_hw_layout")) { + GELOGE(PARAM_INVALID, "Invalid Param, is_output_adjust_hw_layout only support true/false: but is [%s]", + is_output_fp16.c_str()); + return PARAM_INVALID; + } + if (is_fp16 == "false") { + output_formats.push_back(DOMI_TENSOR_ND); + } else if (is_fp16 == "true") { + output_formats.push_back(domi::DOMI_TENSOR_NC1HWC0); + } + } + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SetOutputNodeInfo(ge::Graph &graph, + const std::string &output_type, + const std::string &output) { + ge::ComputeGraphPtr compute_graph = ge::GraphUtils::GetComputeGraph(graph); + GE_CHECK_NOTNULL(compute_graph); + + std::vector> user_out_nodes = domi::GetContext().user_out_nodes; + std::vector output_formats = domi::GetContext().output_formats; + std::vector> output_nodes_info; + std::vector output_nodes_name; + + std::map> out_type_index_map; + std::map> out_type_dt_map; + if (!output_type.empty()) { + if (ParseOutputType(output_type, out_type_index_map, out_type_dt_map) != SUCCESS) { + GELOGE(domi::FAILED, "Parse output_type failed."); + return domi::FAILED; + } + } + + // User declared outputs + for (uint32_t i = 0; i < user_out_nodes.size(); ++i) { + ge::NodePtr out_node = compute_graph->FindNode(user_out_nodes[i].first); + if (out_node == nullptr) { + GELOGE(domi::FAILED, "Can not find src node (%s) in graph.", user_out_nodes[i].first.c_str()); + return domi::FAILED; + } + auto op_desc = out_node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + if (CheckOutNode(op_desc, user_out_nodes[i].second) != SUCCESS) { + GELOGE(domi::FAILED, "Check out node (%s) fail.", user_out_nodes[i].first.c_str()); + return domi::FAILED; + } + if (i < output_formats.size()) { + if (output_formats[i] == domi::DOMI_TENSOR_NC1HWC0) { + GELOGI("The output node [%s] should be set NC1HWC0", user_out_nodes[i].first.c_str()); + if (!ge::AttrUtils::SetBool(op_desc, "output_set_fp16_nc1hwc0", true)) { + GELOGW("The output node [%s] set NC1HWC0 failed", user_out_nodes[i].first.c_str()); + } + } + } + auto it_index = out_type_index_map.find(user_out_nodes[i].first); + auto it_dt = out_type_dt_map.find(user_out_nodes[i].first); + if ((it_index != out_type_index_map.end()) && (it_dt != out_type_dt_map.end())) { + GELOGI("The output node [%s] need to be set output_type", user_out_nodes[i].first.c_str()); + (void)ge::AttrUtils::SetListDataType(op_desc, "_output_dt_list", it_dt->second); + (void)ge::AttrUtils::SetListInt(op_desc, "_output_dt_index", it_index->second); + } + output_nodes_info.push_back(std::make_pair(out_node, user_out_nodes[i].second)); + output_nodes_name.push_back(out_node->GetName()); + } + // default output node (leaf) + if (user_out_nodes.empty()) { + for (ge::NodePtr node : compute_graph->GetDirectNode()) { + if (!node->GetInDataNodes().empty() && node->GetOutDataNodes().empty()) { + Status ret = GetOutputLeaf(node, output_nodes_info, output_nodes_name); + GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "find leaf fail."); + } + } + } + compute_graph->SetGraphOutNodesInfo(output_nodes_info); + domi::GetContext().net_out_nodes = output_nodes_name; + return domi::SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ParseInputShape( + const string &input_shape, unordered_map> &shape_map, + vector>> &user_shape_map, bool is_dynamic_input) { + vector shape_vec = StringUtils::Split(input_shape, ';'); + const int DEFAULT_SHAPE_PAIR_SIZE = 2; + for (const auto &shape : shape_vec) { + vector shape_pair_vec = SplitInputShape(shape); + if (shape_pair_vec.size() != DEFAULT_SHAPE_PAIR_SIZE) { + ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, + {shape, kSplitError1, kInputShapeSample1}); + GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.", + shape.c_str(), kSplitError1, kInputShapeSample1); + return false; + } + if (shape_pair_vec[1].empty()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, + {shape, kEmptyError, kInputShapeSample1}); + GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.", + shape.c_str(), kEmptyError, kInputShapeSample1); + return false; + } + + vector shape_value_strs = StringUtils::Split(shape_pair_vec[1], ','); + vector shape_values; + for (auto &shape_value_str : shape_value_strs) { + // stoul: The method may throw an exception: invalid_argument/out_of_range + if (std::string::npos != shape_value_str.find('.')) { + ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, + {shape, kFloatNumError, kInputShapeSample2}); + GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.", + shape.c_str(), kFloatNumError, kInputShapeSample2); + return false; + } + + long left_result = 0; + try { + left_result = stol(StringUtils::Trim(shape_value_str)); + if (!shape_value_str.empty() && (shape_value_str.front() == '-')) { + // The value maybe dynamic shape [-1], need substr it and verify isdigit. + shape_value_str = shape_value_str.substr(1); + } + for (char c : shape_value_str) { + if (!isdigit(c)) { + ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, + {shape, kDigitError, kInputShapeSample2}); + GELOGE(PARAM_INVALID, "--input_shape's shape value[%s] is not digit", shape_value_str.c_str()); + return false; + } + } + } catch (const std::out_of_range &) { + ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, + {"input_shape", shape_value_str}); + GELOGW("Input parameter[--input_shape]’s value[%s] cause out of range execption!", shape_value_str.c_str()); + return false; + } catch (const std::invalid_argument &) { + ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, + {"input_shape", shape_value_str}); + GELOGW("Input parameter[--input_shape]’s value[%s] cause invalid argument!", shape_value_str.c_str()); + return false; + } catch (...) { + ErrorManager::GetInstance().ATCReportErrMessage("E10015", {"parameter", "value"}, + {"input_shape", shape_value_str}); + GELOGW("Input parameter[--input_shape]’s value[%s] cause unkown execption!", shape_value_str.c_str()); + return false; + } + int64_t result = left_result; + // - 1 is not currently supported + if (!is_dynamic_input && result <= 0) { + ErrorManager::GetInstance().ATCReportErrMessage("E10011", {"shape", "result"}, {shape, std::to_string(result)}); + GELOGW( + "Input parameter[--input_shape]’s shape value[%s] is invalid, " + "expect positive integer, but value is %ld.", + shape.c_str(), result); + return false; + } + shape_values.push_back(result); + } + + shape_map.emplace(make_pair(StringUtils::Trim(shape_pair_vec[0]), shape_values)); + user_shape_map.push_back(make_pair(StringUtils::Trim(shape_pair_vec[0]), shape_values)); + } + + return true; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ParseOutputNodes(const string &out_nodes) { + try { + // parse output node + if (!out_nodes.empty()) { + domi::GetContext().out_nodes_map.clear(); + domi::GetContext().user_out_nodes.clear(); + + vector nodes_v = StringUtils::Split(out_nodes, ';'); + for (const string &node : nodes_v) { + vector key_value_v = StringUtils::Split(node, ':'); + if (key_value_v.size() != 2) { // The size must be 2. + ErrorManager::GetInstance().ATCReportErrMessage( + "E10068", {"parameter", "value", "reason"}, + {"out_nodes", node, "the correct format is \"node_name1:0;node_name1:1;node_name2:0\""}); + GELOGE(PARAM_INVALID, + "The input format of --out_nodes is invalid, the correct format is " + "\"node_name1:0;node_name1:1;node_name2:0\", while the actual input is %s.", + node.c_str()); + return PARAM_INVALID; + } + auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]); + // stoi: The method may throw an exception: invalid_argument/out_of_range + if (!CheckDigitStr(key_value_v[1])) { + ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"out_nodes", out_nodes}); + GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s", out_nodes.c_str()); + return PARAM_INVALID; + } + int32_t index = stoi(StringUtils::Trim(key_value_v[1])); + if (iter != domi::GetContext().out_nodes_map.end()) { + iter->second.emplace_back(index); + } else { + std::vector index_v; + index_v.emplace_back(index); + domi::GetContext().out_nodes_map.emplace(key_value_v[0], index_v); + } + domi::GetContext().user_out_nodes.push_back(std::make_pair(key_value_v[0], index)); + } + } + } catch (std::invalid_argument &) { + GELOGE(PARAM_INVALID, "Invalid of out_nodes: %s ", out_nodes.c_str()); + ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"out_nodes", out_nodes}); + return PARAM_INVALID; + } catch (std::out_of_range &) { + GELOGE(PARAM_INVALID, "Invalid of out_nodes: %s ", out_nodes.c_str()); + ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, {"out_nodes", out_nodes}); + return PARAM_INVALID; + } + + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ParseOpConf(const char *op_conf) { + if (op_conf != nullptr && *op_conf != '\0') { + // divided by ":" + PropertiesManager::Instance().SetPropertyDelimiter(OP_CONF_DELIMITER); + // Parsing the op_conf configuration item file + if (!PropertiesManager::Instance().Init(op_conf)) { + GELOGE(FAILED, "op_name_map init failed!"); + return FAILED; + } + // Return map and put it into ATC global variable + domi::GetContext().op_conf_map = PropertiesManager::Instance().GetPropertyMap(); + } + return SUCCESS; +} +} // namespace ge diff --git a/src/ge/common/model_parser/graph_parser_util.h b/src/ge/common/model_parser/graph_parser_util.h new file mode 100644 index 00000000..76ac07fd --- /dev/null +++ b/src/ge/common/model_parser/graph_parser_util.h @@ -0,0 +1,68 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_COMMON_GRAPH_PARSER_UTIL_H_ +#define GE_COMMON_GRAPH_PARSER_UTIL_H_ + +#include +#include +#include +#include +#include "framework/common/types.h" +#include "framework/omg/omg_inner_types.h" +#include "framework/omg/parser/parser_inner_ctx.h" +#include "proto/ge_ir.pb.h" +#include "proto/om.pb.h" + +#include "graph/compute_graph.h" +#include "graph/graph.h" +#include "graph/model.h" +#include "runtime/kernel.h" + +using domi::Status; +using std::pair; +using std::string; +using std::unordered_map; +using std::vector; + +namespace ge { +/** + * @ingroup domi_omg + * @brief init omg context + * @return void + */ +Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format); + +Status ParseOutputFp16NodesFormat(const string &is_output_fp16); + +Status ParseOutputNodes(const string &out_nodes); + +bool ParseInputShape(const string &input_shape, unordered_map> &shape_map, + vector>> &user_shape_map, bool is_dynamic_input); + +Status ParseOpConf(const char *op_conf); +} // namespace ge + +namespace domi { +/** + * @ingroup domi_omg + * @brief get omg context + * @return reference of OmgContext + */ +ge::OmgContext &GetContext(); +} // namespace domi + +#endif // GE_COMMON_GRAPH_PARSER_UTIL_H_ diff --git a/src/ge/common/profiling/profiling_manager.cc b/src/ge/common/profiling/profiling_manager.cc index 04d23546..364f8298 100644 --- a/src/ge/common/profiling/profiling_manager.cc +++ b/src/ge/common/profiling/profiling_manager.cc @@ -16,15 +16,12 @@ #include "common/profiling/profiling_manager.h" -#include #include "framework/common/debug/ge_log.h" #include "framework/common/debug/log.h" #include "framework/common/string_util.h" #include "graph/ge_context.h" #include "runtime/base.h" -using Json = nlohmann::json; - namespace { const char *const kJobID = "jobID"; const char *const kDeviceID = "deviceID"; @@ -35,6 +32,7 @@ const char *const kEvents = "events"; const char *const kAiCoreEvents = "ai_core_events"; const char *const kName = "name"; const char *const kTraceID = "traceId"; +const char *const kProfDir = "resultPath"; const size_t kReportMaxLen = 2048; } // namespace @@ -100,6 +98,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In Json start_prof_conf = Json::parse(config); Json &prof_conf = start_prof_conf[kStartCfg][0]; job_id_ = prof_conf[kJobID]; + auto iter = prof_conf.find(kProfDir); + if (iter != prof_conf.end()) { + prof_dir_ = prof_conf[kProfDir]; + } Json &device_id = prof_conf[kDeviceID]; if (device_id.size() != 0) { vector().swap(device_id_); @@ -126,23 +128,36 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In } } - GELOGI("Profiling json config from acl:%s", config.c_str()); Json &features = prof_conf[kFeatures]; + if (ParseFeaturesFromAclCfg(features) != SUCCESS) { + GELOGE(FAILED, "Parse feature from acl cfg failed."); + return FAILED; + } + is_profiling_ = true; + } catch (...) { + GELOGE(FAILED, "Json conf is not invalid !"); + return ge::PARAM_INVALID; + } +#endif + return ge::SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::ParseFeaturesFromAclCfg( + const Json &features) { +#ifdef DAVINCI_SUPPORT_PROFILING + try { for (size_t i = 0; i < features.size(); ++i) { - Json &feature = features[i]; + const Json &feature = features[i]; if ((feature.find(kName) == feature.end()) || feature[kName].is_null()) { continue; } - const std::string &name = feature[kName]; if (name == "op_trace") { - GELOGI("Op trace config from acl"); - Json &conf = feature[kConf]; - Json &events = conf[0][kEvents]; + const Json &conf = feature[kConf]; + const Json &events = conf[0][kEvents]; const std::string &ai_core_events = events[0][kAiCoreEvents]; GELOGI("Op trace config from acl ai_core_events:%s", ai_core_events.c_str()); is_op_trace_ = true; - // op trace get conf ProfMgrConf prof_mgr_conf; int result = ProfMgrGetConf(ai_core_events, &prof_mgr_conf); if (result != 0) { @@ -154,10 +169,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In GELOGI("Op trace profiling iter num %d,", op_trace_iter_num_); } else if (name == "task_trace") { is_op_trace_ = false; + if (feature.find(kConf) != feature.end()) { + const Json &conf = feature[kConf]; + std::stringstream task_trace_conf; + task_trace_conf << conf; + task_trace_conf_ = task_trace_conf.str(); + } GELOGI("Task trace config from acl"); } else if (name == "system_trace") { is_op_trace_ = false; - Json &conf = feature[kConf]; + const Json &conf = feature[kConf]; std::stringstream system_trace_conf; system_trace_conf << conf; system_trace_conf_ = system_trace_conf.str(); @@ -165,10 +186,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In } profiling_opts_.push_back(name); } - - is_profiling_ = true; } catch (...) { - GELOGE(FAILED, "Json conf is not invalid !"); + GELOGE(ge::PARAM_INVALID, "Json conf feature is not invalid !"); return ge::PARAM_INVALID; } #endif @@ -235,6 +254,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St p_device[kDeviceID] = std::to_string(device_id); p_device[kJobID] = job_id_; p_device[kTraceID] = std::to_string(GetContext().TraceId()); + if (!prof_dir_.empty()) { + p_device[kProfDir] = prof_dir_; + GELOGI("Prof dir: %s.", prof_dir_.c_str()); + } Json features; if (is_op_trace_) { @@ -258,6 +281,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St Json f; if (profiling_opts_[i] == "system_trace") { f[kConf] = nlohmann::json::parse(system_trace_conf_); + } else if (profiling_opts_[i] == "task_trace") { + if (!task_trace_conf_.empty()) { + f[kConf] = nlohmann::json::parse(task_trace_conf_); + } } f[kName] = profiling_opts_[i]; features[i] = f; diff --git a/src/ge/common/profiling/profiling_manager.h b/src/ge/common/profiling/profiling_manager.h index 2dc0b407..26ee84ca 100644 --- a/src/ge/common/profiling/profiling_manager.h +++ b/src/ge/common/profiling/profiling_manager.h @@ -17,6 +17,7 @@ #ifndef GE_COMMON_PROFILING_PROFILING_MANAGER_H_ #define GE_COMMON_PROFILING_PROFILING_MANAGER_H_ +#include #include #include #include @@ -30,6 +31,7 @@ using std::map; using std::string; using std::vector; +using Json = nlohmann::json; namespace ge { const std::string GE_PROFILING_MODULE = "Framework"; @@ -84,11 +86,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { void PluginUnInit(const std::string &module) const; private: + ge::Status ParseFeaturesFromAclCfg(const Json &feature); bool is_profiling_ = false; bool is_op_trace_ = false; bool is_load_ = false; int32_t op_trace_iter_num_ = 0; string job_id_; + string prof_dir_; vector device_id_; vector op_trace_conf_; vector profiling_opts_; @@ -96,6 +100,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { string recv_profiling_config_; string send_profiling_config_; string system_trace_conf_; + string task_trace_conf_; const ProfilingEngineImpl engine_; }; } // namespace ge diff --git a/src/ge/common/types.cc b/src/ge/common/types.cc index 80dea8a0..2de75ff6 100644 --- a/src/ge/common/types.cc +++ b/src/ge/common/types.cc @@ -292,6 +292,7 @@ REGISTER_OPTYPE_DEFINE(BASICLSTMCELL, "BasicLSTMCell"); REGISTER_OPTYPE_DEFINE(GETNEXT, "GetNext"); REGISTER_OPTYPE_DEFINE(INITDATA, "InitData"); REGISTER_OPTYPE_DEFINE(REFIDENTITY, "RefIdentity"); +REGISTER_OPTYPE_DEFINE(BITCAST, "Bitcast"); /***************Ann special operator*************************/ REGISTER_OPTYPE_DEFINE(ANN_MEAN, "AnnMean"); @@ -382,6 +383,8 @@ REGISTER_OPTYPE_DEFINE(HCOMALLREDUCE, "HcomAllReduce"); REGISTER_OPTYPE_DEFINE(HCOMREDUCESCATTER, "HcomReduceScatter"); REGISTER_OPTYPE_DEFINE(HCOMSEND, "HcomSend"); REGISTER_OPTYPE_DEFINE(HCOMRECEIVE, "HcomReceive"); +REGISTER_OPTYPE_DEFINE(HCOMREMOTEREAD, "HcomRemoteRead"); +REGISTER_OPTYPE_DEFINE(HCOMREMOTEWRITE, "HcomRemoteWrite"); REGISTER_OPTYPE_DEFINE(VARASSIGN, "VarAssign"); REGISTER_OPTYPE_DEFINE(VARISINITIALIZEDOP, "VarIsInitializedOp"); diff --git a/src/ge/common/util.cc b/src/ge/common/util.cc index 69dc7442..55f0d330 100644 --- a/src/ge/common/util.cc +++ b/src/ge/common/util.cc @@ -363,7 +363,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const std::map args_map; if (file_path.empty()) { ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {atc_param}); - GELOGW("Input parameter's value is empty."); + GELOGW("Input parameter %s is empty.", file_path.c_str()); return false; } std::string real_path = RealPath(file_path.c_str()); diff --git a/src/ge/engine_manager/dnnengine_manager.cc b/src/ge/engine_manager/dnnengine_manager.cc index 9afb207f..ad36ebb5 100644 --- a/src/ge/engine_manager/dnnengine_manager.cc +++ b/src/ge/engine_manager/dnnengine_manager.cc @@ -181,13 +181,12 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) { GELOGI("DNNEngineManager: Can not get op info by op type %s", op_desc->GetType().c_str()); return ""; } - string ge_core_type; + std::string ge_core_type; Status ret = ge::GetContext().GetOption(ge::CORE_TYPE, ge_core_type); - if (ret != SUCCESS) { - GELOGD("get the option CORE_TYPE fail, set it to default value VECTOR_ENGINE"); - } - string exclude_core_Type = (ge_core_type == kVectorCore) ? kAIcoreEngine : kVectorEngine; + GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGD("get the option CORE_TYPE fail, set it to default value VECTOR_ENGINE")); + std::string exclude_core_Type = (ge_core_type == kVectorCore) ? kAIcoreEngine : kVectorEngine; GELOGD("engine type will exclude: %s", exclude_core_Type.c_str()); + std::map unsupported_reasons; for (const auto &it : op_infos) { if (it.engine == exclude_core_Type) { @@ -204,7 +203,7 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) { checksupport_cost_[kernel_name] += GetCurrentTimestap() - start_time; op_desc->SetOpEngineName(it.engine); op_desc->SetOpKernelLibName(kernel_name); - GELOGD("DNNEngineManager:Set OpKernelLibName %s and engine name %s into op_desc %s", kernel_name.c_str(), + GELOGD("DNNEngineManager:Set OpKernelLibName %s and engine name %s to op_desc %s", kernel_name.c_str(), it.engine.c_str(), op_desc->GetName().c_str()); return it.engine; } else { @@ -222,6 +221,9 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) { unsupported_reasons.emplace(kernel_name, unsupported_reason); GELOGI("DNNEngineManager:Check support failed, kernel_name is %s, op type is %s, op name is %s", kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str()); + if (!op_desc->HasAttr("_is_ge_op")) { + ErrorManager::GetInstance().ATCReportErrMessage("W11001", {"opname"}, {op_desc->GetName()}); + } } } else { GELOGW( @@ -371,7 +373,7 @@ Status DNNEngineManager::ParserEngineMessage(const json engines_json, const std: } Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle handle) { - GELOGI("Begin to read json file"); + GELOGD("Begin to read json file"); if (file_path.empty()) { GELOGE(FAILED, "Json path %s is not valid", file_path.c_str()); return FAILED; @@ -406,12 +408,12 @@ Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle h return FAILED; } ifs.close(); - GELOGI("Read json file success"); + GELOGD("Read json file success"); return SUCCESS; } Status DNNEngineManager::CheckJsonFile() { - GELOGI("Begin to check json file"); + GELOGD("Begin to check json file"); for (auto &it : engines_map_) { std::string engine_name = it.first; int count = 0; @@ -431,7 +433,7 @@ Status DNNEngineManager::CheckJsonFile() { return FAILED; } } - GELOGI("Check json file success"); + GELOGD("Check json file success"); return SUCCESS; } } // namespace ge diff --git a/src/ge/executor/CMakeLists.txt b/src/ge/executor/CMakeLists.txt index 0cdb00e2..1915cd38 100755 --- a/src/ge/executor/CMakeLists.txt +++ b/src/ge/executor/CMakeLists.txt @@ -60,6 +60,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "../graph/load/new_model_manager/task_info/task_info.cc" "../graph/load/new_model_manager/tbe_handle_store.cc" "../graph/load/new_model_manager/zero_copy_task.cc" + "../graph/load/new_model_manager/zero_copy_offset.cc" "../graph/manager/graph_caching_allocator.cc" "../graph/manager/graph_manager_utils.cc" "../graph/manager/graph_mem_allocator.cc" diff --git a/src/ge/executor/ge_executor.cc b/src/ge/executor/ge_executor.cc index 098c57b6..ee65faec 100644 --- a/src/ge/executor/ge_executor.cc +++ b/src/ge/executor/ge_executor.cc @@ -36,6 +36,9 @@ #include "mmpa/mmpa_api.h" #include "single_op/single_op_manager.h" +using std::string; +using std::vector; + namespace { const size_t kDynamicBatchSizeVecSize = 1; const size_t kStaticBatchInfoSize = 1; @@ -102,20 +105,36 @@ void SetDynamicInputDataFlag(const ge::RunModelData &input_data, const std::vect ge::InputData &inputs) { inputs.is_dynamic_batch = true; std::string batch_label; + size_t match_idx = 0; for (size_t i = 0; i < batch_info.size(); ++i) { - if (batch_info[i].size() == kDynamicBatchSizeVecSize && - batch_info[i][0] == static_cast(input_data.dynamic_batch_size)) { - batch_label = kBatchLabel + std::to_string(i); - inputs.batch_label = batch_label; + // dynamic_dims + if (input_data.dynamic_dims.size() != 0) { + bool is_match = true; + for (size_t j = 0; j < static_cast(input_data.dynamic_dims.size()); ++j) { + if (static_cast(batch_info[i][j]) != input_data.dynamic_dims[j]) { + is_match = false; + break; + } + } + if (is_match) { + match_idx = i; + break; + } + // dynamic_batch_size + } else if (batch_info[i].size() == kDynamicBatchSizeVecSize && + batch_info[i][0] == static_cast(input_data.dynamic_batch_size)) { + match_idx = i; break; + // dynamic_image_size } else if (batch_info[i].size() == kDynamicImageSizeVecSize && batch_info[i][0] == static_cast(input_data.dynamic_image_height) && batch_info[i][1] == static_cast(input_data.dynamic_image_width)) { - batch_label = kBatchLabel + std::to_string(i); - inputs.batch_label = batch_label; + match_idx = i; break; } } + batch_label = kBatchLabel + std::to_string(match_idx); + inputs.batch_label = batch_label; GELOGI("current batch label:%s", batch_label.c_str()); } @@ -225,39 +244,41 @@ Status GeExecutor::Finalize() { Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t batch_size) { if (dynamic_input_addr == nullptr) { - GELOGE(FAILED, "Dynamic input addr is nullptr!"); - return FAILED; + GELOGE(PARAM_INVALID, "Dynamic input addr is nullptr!"); + return PARAM_INVALID; } uint64_t size = sizeof(uint64_t); if (length < size) { - GELOGE(FAILED, "Dynamic input size [%lu] is less than [%lu]!", length, size); - return FAILED; + GELOGE(PARAM_INVALID, "Dynamic input size [%lu] is less than [%lu]!", length, size); + return PARAM_INVALID; } // Verify whether the input dynamic batch matches the model gear std::vector> batch_info; std::vector batch_num{batch_size}; - Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info); + int32_t dynamic_type = static_cast(FIXED); + Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); if (ret != SUCCESS) { - GELOGE(FAILED, "Get dynamic input info failed."); - return FAILED; + GELOGE(ret, "Get dynamic input info failed."); + return ret; } if (!IsDynamicBatchSizeMatchModel(batch_size, batch_info)) { - GELOGE(FAILED, "The current dynamic input does not match the gear of the model."); - return FAILED; + GELOGE(PARAM_INVALID, "The current dynamic input does not match the gear of the model."); + return PARAM_INVALID; } - ret = GraphExecutor::SetDynamicSize(model_id, batch_num); + ret = GraphExecutor::SetDynamicSize(model_id, batch_num, static_cast(DYNAMIC_BATCH)); if (ret != SUCCESS) { - GELOGE(FAILED, "Set dynamic size failed"); - return FAILED; + GELOGE(ret, "Set dynamic size failed"); + return ret; } // memcpy dynamic_batch_size from host to device - if (rtMemcpy(dynamic_input_addr, length, &batch_size, size, RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { - GELOGE(FAILED, "memcpy dynamic batch input data failed!"); - return FAILED; + rtError_t rt_ret = rtMemcpy(dynamic_input_addr, length, &batch_size, size, RT_MEMCPY_HOST_TO_DEVICE); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "memcpy dynamic batch input data failed! ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } return SUCCESS; } @@ -265,40 +286,42 @@ Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_ad Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height, uint64_t image_width) { if (dynamic_input_addr == nullptr) { - GELOGE(FAILED, "Dynamic input addr is nullptr!"); - return FAILED; + GELOGE(PARAM_INVALID, "Dynamic input addr is nullptr!"); + return PARAM_INVALID; } uint64_t dynamic_input_size = kDynamicImageSizeInputSize * sizeof(uint64_t); if (length < dynamic_input_size) { - GELOGE(FAILED, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); - return FAILED; + GELOGE(PARAM_INVALID, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); + return PARAM_INVALID; } // Verify whether the input dynamic resolution matches the model gear std::vector> batch_info; std::vector batch_num{image_height, image_width}; - Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info); + int32_t dynamic_type = static_cast(FIXED); + Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); if (ret != SUCCESS) { - GELOGE(FAILED, "Get dynamic input info failed."); - return FAILED; + GELOGE(ret, "Get dynamic input info failed."); + return ret; } if (!IsDynamicImageSizeMatchModel(image_height, image_width, batch_info)) { - GELOGE(FAILED, "The current dynamic input does not match the gear of the model."); - return FAILED; + GELOGE(PARAM_INVALID, "The current dynamic input does not match the gear of the model."); + return PARAM_INVALID; } - ret = GraphExecutor::SetDynamicSize(model_id, batch_num); + ret = GraphExecutor::SetDynamicSize(model_id, batch_num, static_cast(DYNAMIC_IMAGE)); if (ret != SUCCESS) { - GELOGE(FAILED, "Set dynamic size failed"); - return FAILED; + GELOGE(ret, "Set dynamic size failed"); + return ret; } // Memcpy dynamic resolution height from host to device - if (rtMemcpy(dynamic_input_addr, sizeof(uint64_t), &image_height, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE) != - RT_ERROR_NONE) { - GELOGE(FAILED, "memcpy dynamic resolution input data failed!"); - return FAILED; + rtError_t rt_ret = + rtMemcpy(dynamic_input_addr, sizeof(uint64_t), &image_height, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "memcpy dynamic resolution input data failed! ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } uint64_t remain_size = length - sizeof(uint64_t); @@ -311,16 +334,109 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad return SUCCESS; } -Status GeExecutor::GetCurShape(const uint32_t model_id, std::vector &batch_info) { +Status GeExecutor::SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length, + const vector &dynamic_dims) { + if (dynamic_input_addr == nullptr) { + GELOGE(FAILED, "Dynamic input addr is nullptr!"); + return FAILED; + } + + Status ret = GraphExecutor::SetDynamicSize(model_id, dynamic_dims, static_cast(DYNAMIC_DIMS)); + if (ret != SUCCESS) { + GELOGE(FAILED, "Set dynamic size failed"); + return FAILED; + } + + vector cur_dynamic_dims; + if (GetCurDynamicDims(model_id, dynamic_dims, cur_dynamic_dims) != SUCCESS) { + GELOGE(FAILED, "GetCurDynamicDims failed."); + return FAILED; + } + + size_t dynamic_dim_num = cur_dynamic_dims.size(); + uint64_t dynamic_input_size = static_cast(dynamic_dim_num * sizeof(uint64_t)); + if (length < dynamic_input_size) { + GELOGE(FAILED, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); + return FAILED; + } + + for (uint32_t i = 0; i < dynamic_dim_num; ++i) { + // Memcpy dynamic dim[i] from host to device + if (rtMemcpy(reinterpret_cast(reinterpret_cast(dynamic_input_addr) + sizeof(uint64_t) * i), + length - sizeof(uint64_t) * i, &cur_dynamic_dims[i], sizeof(uint64_t), + RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { + GELOGE(FAILED, "memcpy dynamic resolution input data failed!"); + return FAILED; + } + } + return SUCCESS; +} + +Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector &combined_dims, + vector &cur_dynamic_dims) { + vector> combined_batch; + if (GraphExecutor::GetCombinedDynamicDims(model_id, combined_batch) != SUCCESS) { + GELOGE(FAILED, "Get combined dynamic dims info failed."); + return FAILED; + } + if (combined_batch.empty()) { + GELOGE(FAILED, "Combined dynamic dims is empty."); + return FAILED; + } + + if (combined_dims.size() != combined_batch[0].size()) { + GELOGE(FAILED, "Input dynamic dims's dimension size[%zu] is different from model[%zu].", combined_dims.size(), + combined_batch[0].size()); + return FAILED; + } + bool matched = false; + size_t idx = 0; + for (size_t i = 0; i < combined_batch.size(); i++) { + bool is_match = true; + for (size_t j = 0; j < combined_dims.size(); j++) { + if (combined_dims[j] != static_cast(combined_batch[i][j])) { + is_match = false; + break; + } + } + if (is_match) { + idx = i; + matched = true; + break; + } + } + + if (!matched) { + GELOGE(FAILED, "Input dynamic dims can not match model."); + return FAILED; + } + + // batch_info save the dynamic info of combined_dims + vector> batch_info; + int32_t dynamic_type = static_cast(FIXED); + if (GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type) != SUCCESS) { + GELOGE(FAILED, "Get dynamic input info failed."); + return FAILED; + } + + cur_dynamic_dims.clear(); + for (size_t i = 0; i < batch_info[idx].size(); i++) { + cur_dynamic_dims.emplace_back(static_cast(batch_info[idx][i])); + } + + return SUCCESS; +} + +Status GeExecutor::GetCurShape(const uint32_t model_id, std::vector &batch_info, int32_t &dynamic_type) { GELOGI("Begin to get current shape"); if (!isInit_) { GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); return GE_EXEC_NOT_INIT; } - Status ret = GraphExecutor::GetCurShape(model_id, batch_info); + Status ret = GraphExecutor::GetCurShape(model_id, batch_info, dynamic_type); if (ret != SUCCESS) { - GELOGE(FAILED, "Get current shape failed"); - return FAILED; + GELOGE(ret, "Get current shape failed"); + return ret; } return SUCCESS; } @@ -330,12 +446,12 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add const kAippDynamicPara &aippParms) { GELOGI("Enter to SetDynamicAippData."); if (dynamic_input_addr == nullptr) { - GELOGE(FAILED, "Dynamic aipp input addr is nullptr!"); - return FAILED; + GELOGE(PARAM_INVALID, "Dynamic aipp input addr is nullptr!"); + return PARAM_INVALID; } if (aippBatchPara.empty()) { - GELOGE(FAILED, "aippBatchPara is empty."); - return FAILED; + GELOGE(PARAM_INVALID, "aippBatchPara is empty."); + return PARAM_INVALID; } uint64_t batch_num = aippBatchPara.size(); uint64_t real_aippParms_size = sizeof(kAippDynamicPara) - sizeof(kAippDynamicBatchPara); @@ -345,24 +461,25 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add "batch num is %lu, struct_len is %lu", model_id, length, batch_num, struct_len); if (struct_len > length) { - GELOGE(FAILED, "input dynamic aipp param len [%lu] is larger than aipp_data size [%lu]", struct_len, length); - return FAILED; + GELOGE(PARAM_INVALID, "input dynamic aipp param len [%lu] is larger than aipp_data size [%lu]", struct_len, length); + return PARAM_INVALID; } // Memcpy real kAippDynamicBatchPara from host to device - if (rtMemcpy(dynamic_input_addr, length, &aippParms, real_aippParms_size, RT_MEMCPY_HOST_TO_DEVICE) != - RT_ERROR_NONE) { - GELOGE(FAILED, "memcpy real_aippParms_size failed!"); - return FAILED; + rtError_t rt_ret = rtMemcpy(dynamic_input_addr, length, &aippParms, real_aippParms_size, RT_MEMCPY_HOST_TO_DEVICE); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "memcpy real_aippParms_size failed! ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } uint64_t remain_len = length - real_aippParms_size; uint8_t *aipp_batch_para_dev = reinterpret_cast(dynamic_input_addr) + real_aippParms_size; for (uint64_t i = 0; i < batch_num; ++i) { - if (rtMemcpy(reinterpret_cast(aipp_batch_para_dev + i * sizeof(kAippDynamicBatchPara)), - (remain_len - i * sizeof(kAippDynamicBatchPara)), &(aippBatchPara[i]), sizeof(kAippDynamicBatchPara), - RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { - GELOGE(FAILED, "memcpy kAippDynamicBatchPara input data failed!"); - return FAILED; + rt_ret = rtMemcpy(reinterpret_cast(aipp_batch_para_dev + i * sizeof(kAippDynamicBatchPara)), + (remain_len - i * sizeof(kAippDynamicBatchPara)), &(aippBatchPara[i]), + sizeof(kAippDynamicBatchPara), RT_MEMCPY_HOST_TO_DEVICE); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "memcpy kAippDynamicBatchPara input data failed! ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } } return SUCCESS; @@ -429,7 +546,7 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { } Status ret = GraphLoader::DestroyAicpuSessionForInfer(model_id); if (ret != SUCCESS) { - GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed."); + GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id); return FAILED; } return GraphLoader::UnloadModel(model_id); @@ -468,17 +585,19 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector> &batch_info) { +Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector> &batch_info, + int32_t &dynamic_type) { GELOGI("Begin to get dynamic batch info."); if (!isInit_) { GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); return GE_EXEC_NOT_INIT; } - Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info); + Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); if (ret != SUCCESS) { GELOGE(ret, "GetDynamicBatchInfo failed."); return ret; @@ -515,6 +636,30 @@ Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector> &batch_info) { + GELOGI("Begin to get combined dynamic dims info."); + if (!isInit_) { + GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); + return GE_EXEC_NOT_INIT; + } + + Status ret = GraphExecutor::GetCombinedDynamicDims(model_id, batch_info); + if (ret != SUCCESS) { + GELOGE(ret, "GetCombinedDynamicDims failed."); + return ret; + } + + GELOGI("Get combined dynamic dims succ."); + return SUCCESS; +} + +/// +/// @ingroup ge /// @brief Get AIPP input format /// @param [in] model_id /// @param [in] index @@ -628,8 +773,8 @@ Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_da string filePath = RealPath(path.c_str()); if (filePath.empty()) { - GELOGE(ge::FAILED, "File path is invalid. please check your text file '%s'.", path.c_str()); - return ge::FAILED; + GELOGE(GE_EXEC_MODEL_PATH_INVALID, "File path is invalid. please check your text file '%s'.", path.c_str()); + return GE_EXEC_MODEL_PATH_INVALID; } GELOGI("load modelData from file: %s.", path.c_str()); std::string key_path; @@ -710,12 +855,20 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel GetDomiOutputData(run_output_data, output_data); if ((run_input_data.dynamic_batch_size != 0) || (run_input_data.dynamic_image_width != 0) || - (run_input_data.dynamic_image_height != 0)) { + (run_input_data.dynamic_image_height != 0) || (run_input_data.dynamic_dims.size() != 0)) { std::vector> batch_info; - Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info); + int32_t dynamic_type = static_cast(FIXED); + Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); if (ret != SUCCESS) { - GELOGE(FAILED, "Get dynamic input info failed."); - return FAILED; + GELOGE(ret, "Get dynamic input info failed."); + return ret; + } + if (dynamic_type == static_cast(DYNAMIC_DIMS)) { + ret = GraphExecutor::GetCombinedDynamicDims(model_id, batch_info); + if (ret != SUCCESS) { + GELOGE(FAILED, "Get dynamic input info failed."); + return FAILED; + } } if (!batch_info.empty()) { SetDynamicInputDataFlag(run_input_data, batch_info, input_data); @@ -790,6 +943,11 @@ Status GeExecutor::LoadSingleOp(const std::string &model_name, const ge::ModelDa return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op); } +Status GeExecutor::LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, + DynamicSingleOp **single_op) { + return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op); +} + Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector &inputs, std::vector &outputs) { if (executor == nullptr) { @@ -800,13 +958,21 @@ Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vectorExecuteAsync(inputs, outputs); } +ge::Status GeExecutor::ExecuteAsync(DynamicSingleOp *executor, const vector &input_desc, + const vector &inputs, vector &output_desc, + vector &outputs) { + GE_CHECK_NOTNULL(executor); + return executor->ExecuteAsync(input_desc, inputs, output_desc, outputs); +} + Status GeExecutor::ReleaseSingleOpResource(void *stream) { return SingleOpManager::GetInstance().ReleaseResource(stream); } Status GeExecutor::GetBatchInfoSize(uint32_t model_id, size_t &shape_count) { std::vector> batch_info; - Status ret = GetDynamicBatchInfo(model_id, batch_info); + int32_t dynamic_type = static_cast(FIXED); + Status ret = GetDynamicBatchInfo(model_id, batch_info, dynamic_type); if (ret != SUCCESS) { GELOGE(ret, "Calc batch info size failed. ret = %d", ret); return ret; diff --git a/src/ge/executor/module.mk b/src/ge/executor/module.mk index 0eb87822..b19f3c24 100644 --- a/src/ge/executor/module.mk +++ b/src/ge/executor/module.mk @@ -26,6 +26,7 @@ local_ge_executor_src_files := \ ../graph/load/new_model_manager/data_inputer.cc \ ../graph/load/new_model_manager/data_dumper.cc \ ../graph/load/new_model_manager/zero_copy_task.cc \ + ../graph/load/new_model_manager/zero_copy_offset.cc \ ../graph/load/new_model_manager/task_info/task_info.cc \ ../graph/load/new_model_manager/task_info/event_record_task_info.cc \ ../graph/load/new_model_manager/task_info/event_wait_task_info.cc \ @@ -79,6 +80,7 @@ local_ge_executor_shared_library := \ libslog \ libmmpa \ libgraph \ + libregister \ libmsprof \ local_ge_executor_ldflags := -lrt -ldl \ @@ -128,6 +130,7 @@ LOCAL_SHARED_LIBRARIES := \ libslog \ libmmpa \ libgraph \ + libregister \ libmsprof \ LOCAL_LDFLAGS += $(local_ge_executor_ldflags) @@ -153,6 +156,7 @@ LOCAL_C_INCLUDES := $(local_ge_executor_c_include) LOCAL_STATIC_LIBRARIES := \ libge_common \ libgraph \ + libregister \ libprotobuf \ LOCAL_SHARED_LIBRARIES := \ @@ -184,6 +188,7 @@ LOCAL_C_INCLUDES := $(local_ge_executor_c_include) LOCAL_STATIC_LIBRARIES := \ libge_common \ libgraph \ + libregister \ libprotobuf \ LOCAL_SHARED_LIBRARIES := \ diff --git a/src/ge/ge_inference.mk b/src/ge/ge_inference.mk index f18f733a..42cc5e7d 100644 --- a/src/ge/ge_inference.mk +++ b/src/ge/ge_inference.mk @@ -70,6 +70,7 @@ OMG_HOST_SRC_FILES := \ graph/passes/resource_pair_remove_control_pass.cc \ graph/passes/pass_utils.cc \ graph/passes/base_pass.cc \ + graph/passes/bitcast_pass.cc \ graph/passes/constant_folding_pass.cc \ graph/passes/aicpu_constant_folding_pass.cc \ graph/passes/reshape_remove_pass.cc \ @@ -91,8 +92,10 @@ OMG_HOST_SRC_FILES := \ graph/passes/print_op_pass.cc \ graph/passes/no_use_reshape_remove_pass.cc \ graph/passes/iterator_op_pass.cc \ + graph/passes/input_output_connection_identify_pass.cc \ graph/passes/atomic_addr_clean_pass.cc \ graph/passes/mark_same_addr_pass.cc \ + graph/passes/mark_graph_unknown_status_pass.cc \ graph/common/omg_util.cc \ graph/common/bcast.cc \ graph/passes/dimension_compute_pass.cc \ @@ -107,6 +110,7 @@ OMG_HOST_SRC_FILES := \ graph/passes/isolated_op_remove_pass.cc \ graph/passes/permute_pass.cc \ graph/passes/ctrl_edge_transfer_pass.cc \ + graph/passes/end_of_sequence_add_control_pass.cc \ host_kernels/broadcast_gradient_args_kernel.cc \ host_kernels/greater_kernel.cc \ host_kernels/gather_v2_kernel.cc \ @@ -185,6 +189,8 @@ OMG_HOST_SRC_FILES := \ graph/passes/hccl_group_pass.cc \ graph/passes/switch_fusion_pass.cc \ graph/passes/switch_split_pass.cc \ + graph/passes/memcpy_addr_async_pass.cc \ + graph/passes/set_input_output_offset_pass.cc \ OMG_DEVICE_SRC_FILES := $(OMG_HOST_SRC_FILES) @@ -203,6 +209,7 @@ OME_HOST_SRC_FILES := \ graph/load/new_model_manager/tbe_handle_store.cc \ graph/load/new_model_manager/cpu_queue_schedule.cc \ graph/load/new_model_manager/zero_copy_task.cc \ + graph/load/new_model_manager/zero_copy_offset.cc \ graph/load/new_model_manager/data_dumper.cc \ graph/load/new_model_manager/task_info/task_info.cc \ graph/load/new_model_manager/task_info/event_record_task_info.cc \ diff --git a/src/ge/ge_local_engine/engine/host_cpu_engine.h b/src/ge/ge_local_engine/engine/host_cpu_engine.h index 1987138d..98e0748b 100644 --- a/src/ge/ge_local_engine/engine/host_cpu_engine.h +++ b/src/ge/ge_local_engine/engine/host_cpu_engine.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ #define GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ diff --git a/src/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc b/src/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc index 0f33ae2a..badca5a3 100644 --- a/src/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc +++ b/src/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc @@ -61,5 +61,6 @@ REGISTER_OP_CREATOR(SwitchN, GeDeletedOp); REGISTER_OP_CREATOR(RefMerge, GeDeletedOp); REGISTER_OP_CREATOR(RefSwitch, GeDeletedOp); REGISTER_OP_CREATOR(TransShape, GeDeletedOp); +REGISTER_OP_CREATOR(Bitcast, GeDeletedOp); } // namespace ge_local } // namespace ge diff --git a/src/ge/ge_runner.mk b/src/ge/ge_runner.mk index fe19de02..bfc1b773 100644 --- a/src/ge/ge_runner.mk +++ b/src/ge/ge_runner.mk @@ -78,6 +78,7 @@ LIBGE_LOCAL_SRC_FILES := \ graph/load/new_model_manager/task_info/task_info.cc \ graph/load/new_model_manager/tbe_handle_store.cc \ graph/load/new_model_manager/zero_copy_task.cc \ + graph/load/new_model_manager/zero_copy_offset.cc \ graph/manager/graph_context.cc \ graph/manager/graph_manager.cc \ graph/manager/graph_manager_utils.cc \ @@ -98,10 +99,13 @@ LIBGE_LOCAL_SRC_FILES := \ graph/passes/addn_pass.cc \ graph/passes/aicpu_constant_folding_pass.cc \ graph/passes/assert_pass.cc \ + graph/passes/input_output_connection_identify_pass.cc \ graph/passes/atomic_addr_clean_pass.cc \ graph/passes/mark_same_addr_pass.cc \ + graph/passes/mark_graph_unknown_status_pass.cc \ graph/partition/dynamic_shape_partition.cc \ graph/passes/base_pass.cc \ + graph/passes/bitcast_pass.cc \ graph/passes/cast_remove_pass.cc \ graph/passes/cast_translate_pass.cc \ graph/passes/common_subexpression_elimination_pass.cc \ @@ -214,6 +218,9 @@ LIBGE_LOCAL_SRC_FILES := \ graph/passes/variable_prepare_op_pass.cc \ graph/passes/variable_ref_delete_op_pass.cc \ graph/passes/variable_ref_useless_control_out_delete_pass.cc \ + graph/passes/end_of_sequence_add_control_pass.cc \ + graph/passes/memcpy_addr_async_pass.cc \ + graph/passes/set_input_output_offset_pass.cc \ graph/preprocess/graph_preprocess.cc \ graph/preprocess/insert_op/ge_aipp_op.cc \ graph/preprocess/insert_op/util_insert_aipp_op.cc \ diff --git a/src/ge/generator/ge_generator.cc b/src/ge/generator/ge_generator.cc index 4869eb40..bc1e78c1 100644 --- a/src/ge/generator/ge_generator.cc +++ b/src/ge/generator/ge_generator.cc @@ -23,15 +23,15 @@ #include "common/util/error_manager/error_manager.h" #include "framework/common/debug/ge_log.h" #include "ge/ge_api.h" -#include "graph/ge_context.h" #include "graph/debug/ge_attr_define.h" +#include "graph/ge_context.h" #include "graph/manager/graph_manager.h" #include "graph/manager/util/rt_context_util.h" #include "graph/opsproto_manager.h" #include "graph/utils/graph_utils.h" #include "graph/utils/type_utils.h" -#include "model/ge_model.h" #include "init/gelib.h" +#include "model/ge_model.h" using std::map; using std::string; @@ -46,6 +46,16 @@ const char *const kFileNameSuffix = "online"; std::map engine_type_map{ {ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}}; + +bool ContainsDynamicInpus(const ge::OpDesc &op_desc) { + for (auto &tensor_desc : op_desc.GetAllInputsDescPtr()) { + if (tensor_desc->MutableShape().IsUnknownShape()) { + GELOGI("Contains unknown shape input. set is_dynamic_input to true."); + return true; + } + } + return false; +} } // namespace namespace ge { @@ -55,6 +65,7 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi GELOGI("CheckEngineType: use default engine."); return SUCCESS; } + // get op engine name string op_engine_name; auto iter = engine_type_map.find(engine_type); @@ -65,6 +76,12 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi GELOGE(FAILED, "CheckEngineType: engine type: %d not support", static_cast(engine_type)); return FAILED; } + + if (op_desc->HasAttr(ATTR_NAME_UNREGST_OPPATH)) { + op_desc->SetOpEngineName(op_engine_name); + op_desc->SetOpKernelLibName(op_engine_name); + return SUCCESS; + } // set op engine name and opkernelLib. when engine support std::shared_ptr instance_ptr = ge::GELib::GetInstance(); if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { @@ -195,18 +212,19 @@ static void GetOpsProtoPath(string &opsproto_path) { class GeGenerator::Impl { public: - Status BuildModel(const Graph &graph, const vector &inputs, GraphId &graph_id, GeRootModelPtr &ge_models); + Status BuildModel(const Graph &graph, const vector &inputs, GeRootModelPtr &ge_models); Status SaveModel(const string &file_name_prefix, GeModelPtr &models, ModelBufferData &model); Status SaveParams(GeModelPtr &ge_model, const string &type, const map &attrs, const vector &inputs, const vector &outputs); - Status GenerateInfershapeGraph(const Graph &graph, GraphId &graph_id); + Status GenerateInfershapeGraph(const Graph &graph); GraphManager graph_manager_; SaveParam save_param_; bool is_offline_ = true; + bool is_singleop_unregistered_ = false; private: static std::string Trim(const std::string &str); @@ -280,10 +298,9 @@ Status GeGenerator::GenerateOnlineModel(const Graph &graph, const vectorGenerateInfershapeGraph(graph, graph_id); + Status ret = impl_->GenerateInfershapeGraph(graph); if (ret != SUCCESS) { GELOGE(ret, "Dump infershape json failed"); if (impl_->graph_manager_.Finalize() != SUCCESS) { @@ -422,11 +439,11 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr } else { ge::RtContextUtil::GetInstance().SetNormalModeContext(ctx); } - GraphId graph_id; + GeRootModelPtr ge_root_model = nullptr; GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); impl_->is_offline_ = is_offline; - Status ret = impl_->BuildModel(graph, inputs, graph_id, ge_root_model); + Status ret = impl_->BuildModel(graph, inputs, ge_root_model); if (ret != SUCCESS) { GELOGE(ret, "Build model failed."); if (impl_->graph_manager_.Finalize() != SUCCESS) { @@ -478,6 +495,12 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in return PARAM_INVALID; } + domi::GetContext().is_dynamic_input = ContainsDynamicInpus(*op_desc); + + if (op_desc->HasAttr(ATTR_NAME_UNREGST_OPPATH)) { + impl_->is_singleop_unregistered_ = true; + } + // 0. Save original attributes. OpDescPtr op_desc_tmp = AttrUtils::CloneOpDesc(op_desc); GE_CHECK_NOTNULL(op_desc_tmp); @@ -494,9 +517,6 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in // 2. Create ComputeGraph. string name = ge::CurrentTimeInStr() + "_" + model_file_name; ge::ComputeGraphPtr compute_graph = MakeShared(name); - if (compute_graph == nullptr) { - return INTERNAL_ERROR; - } GE_CHECK_NOTNULL_EXEC(compute_graph, return INTERNAL_ERROR); // 3. Add Node to ComputeGraph. @@ -529,16 +549,19 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in Graph graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph); GELOGI("ATC parser success in single op build."); - GraphId graph_id; GeRootModelPtr ge_root_model = nullptr; GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); impl_->is_offline_ = is_offline; - GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, graph_id, ge_root_model)); + GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, ge_root_model)); map op_attrs = op_desc_tmp->GetAllAttrs(); GE_CHECK_NOTNULL(ge_root_model); GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); map name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); - GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; + if (name_to_ge_model.empty()) { + GELOGE(PARAM_INVALID, "GetSubgraphInstanceNameToModel is empty."); + return PARAM_INVALID; + } + GeModelPtr &ge_model = name_to_ge_model.begin()->second; GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str()); GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff)); @@ -608,7 +631,7 @@ Status GeGenerator::Impl::SaveModel(const string &file_name_prefix, GeModelPtr & return SUCCESS; } -Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector &inputs, GraphId &graph_id, +Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector &inputs, GeRootModelPtr &ge_root_model) { static GraphId id = 0; const std::map options; @@ -627,19 +650,22 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector return INTERNAL_ERROR; } uint64_t session_id = static_cast(tv.tv_sec * 1000000 + tv.tv_usec); // 1000000us - ret = graph_manager_.BuildGraph(id, inputs, ge_root_model, session_id); + if (is_singleop_unregistered_) { + ret = graph_manager_.BuildGraphForUnregisteredOp(id, inputs, ge_root_model, session_id); + } else { + ret = graph_manager_.BuildGraph(id, inputs, ge_root_model, session_id); + } + if (ret != SUCCESS) { GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager build graph fail, graph id: %u", id); return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; } - - graph_id = id; id += 1; return SUCCESS; } -Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph, GraphId &graph_id) { +Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph) { static GraphId id = 0; const std::map options; Status ret = graph_manager_.AddGraph(id, graph, options); @@ -654,8 +680,6 @@ Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph, GraphId &g GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager generate graph failed"); return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; } - - graph_id = id; id += 1; return SUCCESS; diff --git a/src/ge/graph/build/graph_builder.cc b/src/ge/graph/build/graph_builder.cc index abcc253e..51519023 100644 --- a/src/ge/graph/build/graph_builder.cc +++ b/src/ge/graph/build/graph_builder.cc @@ -37,21 +37,6 @@ const int32_t kInvalidPerfLevel = -1; namespace ge { GraphBuilder::GraphBuilder() : build_mode_(BuildMode::GEN_TASK_WITH_FUSION), hcom_parallel_(false) {} -Status GraphBuilder::MarkGraph(ComputeGraphPtr &graph) { - GE_CHECK_NOTNULL(graph); - bool is_unknown_shape = false; - for (const auto &node : graph->GetDirectNode()) { - GE_CHK_STATUS_RET(ge::NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown_shape), - "Get node[%s] shape status failed!", node->GetName().c_str()); - if (is_unknown_shape) { - break; - } - } - graph->SetGraphUnknownFlag(is_unknown_shape); - GELOGD("mark graph [%s] unknown status success! value is %d", graph->GetName().c_str(), is_unknown_shape); - return SUCCESS; -} - void GraphBuilder::SetOptions(const ge::GraphManagerOptions &options) { stream_max_parallel_num_ = options.stream_max_parallel_num; hcom_parallel_ = options.hcom_parallel; @@ -277,14 +262,6 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, uint64_t session_id) { GELOGI("Start to build BuildForDynamicShape for dynamic shape."); - // mark unknown shape attr - for (auto &sub_graph : comp_graph->GetAllSubgraphs()) { - auto status = MarkGraph(sub_graph); - if (status != SUCCESS) { - GELOGE(FAILED, "mark graph failed!"); - return status; - } - } // Update Root Graph Data size for (auto &node : comp_graph->GetDirectNode()) { auto op_desc = node->GetOpDesc(); @@ -297,11 +274,22 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, } // for (auto &sub_graph : comp_graph->GetAllSubgraphs()) { + // exclude functional subgraph in known subgraph + if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { + continue; + } if (sub_graph->GetGraphUnknownFlag()) { // unknown shape build flow GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), "Build for unknown shape graph failed."); } else { + // reset functional subgraph parent graph as known subgraph + for (const auto &node : sub_graph->GetDirectNode()) { + for (const auto &sub_graph_name : node->GetOpDesc()->GetSubgraphInstanceNames()) { + auto sub_sub_graph = comp_graph->GetSubgraph(sub_graph_name); + GE_CHK_STATUS_RET(sub_graph->AddSubgraph(sub_sub_graph), "Failed add subgraph to known graph."); + } + } // known shape build flow GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, subgraph_ptr_list, ge_model_ptr, session_id), "Build for known shape graph failed."); @@ -450,6 +438,11 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc) GELOGI("Begin to calc dynamic shape graph data[%s] size.", op_desc->GetName().c_str()); // data op only has one output anchor ge::GeTensorDesc output_desc = op_desc->GetOutputDesc(0); + if (output_desc.MutableShape().IsUnknownShape()) { + GELOGI("No need to update dynamic shape graph data output size for unknown shape data."); + return SUCCESS; + } + int64_t output_size = 0; if (ge::TensorUtils::GetSize(output_desc, output_size) != SUCCESS) { GELOGW("Get size failed!"); diff --git a/src/ge/graph/build/graph_builder.h b/src/ge/graph/build/graph_builder.h index 2597aa2a..def3a28b 100644 --- a/src/ge/graph/build/graph_builder.h +++ b/src/ge/graph/build/graph_builder.h @@ -67,7 +67,6 @@ class GraphBuilder { GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); - Status MarkGraph(ComputeGraphPtr &graph); int build_mode_; std::map stream_max_parallel_num_; diff --git a/src/ge/graph/build/memory/block_mem_assigner.cc b/src/ge/graph/build/memory/block_mem_assigner.cc index 1910618d..1eed2b18 100644 --- a/src/ge/graph/build/memory/block_mem_assigner.cc +++ b/src/ge/graph/build/memory/block_mem_assigner.cc @@ -55,6 +55,13 @@ using std::unordered_map; using std::unordered_set; using std::vector; +void AlignMemOffset(size_t &mem_align_size) { + if (mem_align_size <= 0) { + return; + } + mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; +} + void MemoryBlock::SetHeadOffset(size_t offset) { head_offset_ = offset; size_t child_offset = head_offset_; @@ -92,7 +99,7 @@ void MemoryBlock::Resize() { } else { size_t block_size = (child_block_size > *iter) ? child_block_size : *iter; if ((block_size > 0) && (block_size % MEM_ALIGN_SIZE != 0)) { - block_size = (block_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; + AlignMemOffset(block_size); } block_size_ = block_size; if (last_continuous_block_) { @@ -101,6 +108,20 @@ void MemoryBlock::Resize() { } } +size_t MemoryBlock::AlignSize() const { + size_t align_block_size = 0; + auto iter = std::max_element(real_size_list_.begin(), real_size_list_.end()); + if (iter == real_size_list_.end()) { + GELOGW("real_size_list_ is empty"); + } else { + align_block_size = *iter; + if ((align_block_size > 0) && (align_block_size % MEM_ALIGN_SIZE != 0)) { + AlignMemOffset(align_block_size); + } + } + return align_block_size; +} + bool MemoryBlock::IsSameLabel(std::string &first_batch_label) { if (node_type_index_list_.empty()) { return false; @@ -133,31 +154,69 @@ bool MemoryBlock::IsSameLabel(std::string &first_batch_label) { } bool CanNotLifeReuse(MemoryBlock *block) { - if ((block == nullptr) || !block->reuse_mem_ || block->deleted_block_ || block->continuous_block_) { + if ((block == nullptr) || !block->reuse_mem_ || block->deleted_block_) { return true; } return false; } +void MemoryBlock::AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life) { + // continuous memory case:only real_size is maximum can be reused and only one continuous memory in one block + auto it_block = std::max_element(std::begin(block->NoAlignSizeList()), std::end(block->NoAlignSizeList())); + auto it_this = std::max_element(std::begin(NoAlignSizeList()), std::end(NoAlignSizeList())); + if (it_block != std::end(block->NoAlignSizeList()) && it_this != std::end(NoAlignSizeList())) { + if ((continuous_block_ && block->continuous_block_) || (continuous_block_ && (*it_this < *it_block)) || + (block->continuous_block_ && (*it_this > *it_block))) { + GELOGD("Conflict current block size:%zu continuous:%d, reuse block max size:%zu continuous:%d", *it_this, + continuous_block_, *it_block, block->continuous_block_); + return; + } + } + + MemoryBlock *parent = nullptr; + MemoryBlock *child = nullptr; + // merge small block to large block + if (block->GetDependLifeBegin(stream_id_, total_node_depend_stream_life) > GetLifeEnd()) { + if ((block->child_offset_ + AlignSize()) <= *it_block) { + parent = block; + child = this; + } + } + if ((parent != nullptr) && (child != nullptr) && child->child_blocks_.empty()) { + parent->child_blocks_.emplace_back(child); + parent->child_offset_ += child->AlignSize(); + child->deleted_block_ = true; + GELOGI( + "Add continuous block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to" + " block[%p size:%zu, stream id:%ld, life time[begin:%zu, end:%zu]]", + child, child->block_size_, child->stream_id_, child->GetLifeBegin(), child->GetLifeEnd(), parent, + parent->block_size_, parent->stream_id_, parent->GetLifeBegin(), parent->GetLifeEnd()); + } +} + void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life) { if (CanNotLifeReuse(this) || CanNotLifeReuse(block)) { return; } + if (block->continuous_block_) { + AddContinuousLifeReuseBlock(block, total_node_depend_stream_life); + return; + } MemoryBlock *parent = nullptr; MemoryBlock *child = nullptr; // merge small block to large block if (block->GetDependLifeBegin(stream_id_, total_node_depend_stream_life) > GetLifeEnd()) { - if ((child_offset_ + block->block_size_) <= block_size_) { + if ((child_offset_ + block->AlignSize()) <= AlignSize()) { parent = this; child = block; - } else if ((block->child_offset_ + block_size_) <= block->block_size_) { + } else if ((block->child_offset_ + AlignSize()) <= block->AlignSize()) { parent = block; child = this; } } if ((parent != nullptr) && (child != nullptr) && child->child_blocks_.empty()) { parent->child_blocks_.emplace_back(child); - parent->child_offset_ += child->block_size_; + parent->child_offset_ += child->AlignSize(); child->deleted_block_ = true; GELOGI( "Add block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to" @@ -431,7 +490,7 @@ size_t GetBlockSize(size_t size, const vector &ranges) { } GELOGW("Memory needed size:%zu is beyond the biggest block in memory ranges.", size); - return 0; + return size; } bool IsDirectOutputNode(const NodePtr &node, int idx) { @@ -465,34 +524,8 @@ void ReduceReusableBlockCount(const MemoryBlock &mem_block, map &reusable_block_counts, const MemoryBlock &reusable_block, - size_t block_size, size_t real_size, bool continuous, int64_t atomic_addr_clean_id) { + size_t block_size, size_t real_size, bool continuous) { bool can_reuse = false; - - // If node is before atomic_addr_clean node, the continus memory can't be reused. - if (!reusable_block.NodeTypeIndexList().empty()) { - auto node = reusable_block.NodeTypeIndexList()[0].node; - if (node != nullptr) { - auto op_desc = node->GetOpDesc(); - if (op_desc != nullptr) { - if ((op_desc->GetId() < atomic_addr_clean_id) && continuous) { - return false; - } - } - } - } - - // continuous memory case:only real_size is maximum can be reused and only one continuous memory in one block - if (continuous || reusable_block.continuous_block_) { - auto it = - std::max_element(std::begin(reusable_block.NoAlignSizeList()), std::end(reusable_block.NoAlignSizeList())); - if (it != std::end(reusable_block.NoAlignSizeList())) { - GE_IF_BOOL_EXEC((continuous && reusable_block.continuous_block_) || (continuous && (real_size < *it)) || - (reusable_block.continuous_block_ && (real_size > *it)), - GELOGD("Conflict current block size:%zu continuous:%d, reuse block max size:%zu continuous:%d", - real_size, continuous, *it, reusable_block.continuous_block_); - return false;); - } - } if (reusable_block.Size() == block_size) { can_reuse = true; } else { @@ -683,6 +716,34 @@ void BlockMemAssigner::PrintSymbolMap() { } } +bool BlockMemAssigner::IsContinuousOutput(const NodePtr &n) { + if (n == nullptr) { + GELOGE(FAILED, "Node is null."); + return false; + } + + // Get the continuous output type of the node, default is false + bool is_output_continuous = false; + auto node_desc = n->GetOpDesc(); + if (node_desc == nullptr) { + GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str()); + return false; + } + + // If GetBool fail, is_output_continuous is false. + (void)ge::AttrUtils::GetBool(node_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous); + if (is_output_continuous) { + if (n->GetOwnerComputeGraph() != nullptr) { + string graph_name = n->GetOwnerComputeGraph()->GetName(); + GELOGI("%s name[%s] set continuous, output size[%u].", graph_name.c_str(), n->GetName().c_str(), + n->GetAllOutDataAnchorsSize()); + return true; + } + } + + return false; +} + MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, MemoryType mem_type, const NodePtr &n, uint32_t out_index, const vector &workspace_reuse_flag, const bool is_op_reuse_mem, @@ -699,7 +760,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !node_op_desc->HasAttr(kOpNoReuseMem) && reuse_mem_flag && is_op_reuse_mem && (IsPreReuse(n, out_index)); auto stream_id = node_op_desc->GetStreamId(); - if (is_reuse_memory) { + if (is_reuse_memory && !continuous) { for (auto it = reusable_blocks_[stream_id].begin(); it != reusable_blocks_[stream_id].end(); ++it) { MemoryBlock *reusable_block = *it; if (!IsPostReuse(reusable_block)) { @@ -709,8 +770,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, } // A node can reuse blocks of the same stream and preorder streams - auto id = GetAtomicAddrCleanId(); - if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous, id)) { + if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) { reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size); if (mem_type == kOutput) { auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); @@ -750,6 +810,47 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, return block; } +MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector &ranges, + const bool is_op_reuse_mem) { + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); + auto node_op_desc = n->GetOpDesc(); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); + MemoryBlock *block = nullptr; + int64_t total_size = 0; + for (uint32_t index = 0; index < static_cast(node_op_desc->GetOutputsSize()); index++) { + auto output_op_desc = node_op_desc->GetOutputDescPtr(index); + if (output_op_desc == nullptr) { + return nullptr; + } + int64_t size = 0; + if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { + GELOGI("Get size failed"); + return nullptr; + } + size_t align_size = static_cast(size); + AlignMemOffset(align_size); + total_size += align_size; + + // only apply total size in first block + if (index != 0) { + zero_memory_list_.emplace_back(n, kOutput, index); + } + } + + auto block_size = GetBlockSize(total_size, ranges); + GELOGI("Node[%s] continuous out memory size[%ld] block size[%zu]", node_op_desc->GetName().c_str(), total_size, + block_size); + + vector workspace_reuse_flag; + block = ApplyMemory(block_size, total_size, total_size, kOutput, n, 0, workspace_reuse_flag, is_op_reuse_mem, true); + if (block != nullptr) { + // hccl task need align header and tail + block->first_continuous_block_ = true; + block->last_continuous_block_ = true; + } + return block; +} + MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector &ranges, const bool is_op_reuse_mem, const bool continuous) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); @@ -991,6 +1092,10 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector // Allocate memory for the current node and release node memory of the same size in the workspace GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_[stream_id]);) + if (IsContinuousOutput(node)) { + (void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); + return SUCCESS; + } for (uint32_t i = 0; i < static_cast(op_desc->GetOutputsSize()); i++) { int64_t size = 0; auto output_op_desc = op_desc->GetOutputDescPtr(i); @@ -1017,7 +1122,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector continue; } // atomic can't be reused - if (is_op_reuse_mem_ && out_node_set_continuous_input && is_atomic) { + bool need_change = is_op_reuse_mem_ && out_node_set_continuous_input && is_atomic; + if (need_change) { is_op_reuse_mem_ = false; } MemoryBlock *mem_block = ApplyOutMemory(node, i, ranges, is_op_reuse_mem_, out_node_set_continuous_input); @@ -1225,10 +1331,12 @@ static bool CompareBlockIndex(MemoryBlock *left, MemoryBlock *right) { /// @param [in] input blocks need continuous /// @param [out] blocks after continuous order /// @param [in/out] blocks ordered +/// @param [in] input or output /// void ReAssignContinuousBlocks(const std::vector &org_blocks, const std::map block_map, - std::vector &dest_blocks, std::vector &continuous_blocks) { + std::vector &dest_blocks, std::vector &continuous_blocks, + const std::string &type) { for (auto &memory_block : org_blocks) { if (memory_block == nullptr || memory_block->deleted_block_) { continue; @@ -1245,7 +1353,7 @@ void ReAssignContinuousBlocks(const std::vector &org_blocks, for (auto &memory_block : continuous_blocks) { GE_IF_BOOL_EXEC(memory_block == nullptr, continue); - GELOGI("Block continuous input index:%d", memory_block->input_index_); + GELOGI("Block continuous %s index:%d", type.c_str(), memory_block->input_index_); count++; if (count == 1) { memory_block->first_continuous_block_ = true; @@ -1280,7 +1388,7 @@ void BlockMemAssigner::AssignContinuousBlocks() { continuous_block_map.size(), continuous_blocks.size()); continue; } - ReAssignContinuousBlocks(memory_blocks_, continuous_block_map, dest_memory_blocks, continuous_blocks); + ReAssignContinuousBlocks(memory_blocks_, continuous_block_map, dest_memory_blocks, continuous_blocks, "input"); memory_blocks_.swap(dest_memory_blocks); } } @@ -1292,14 +1400,25 @@ void BlockMemAssigner::ReuseBlocksByLifeTime(size_t range_size) { } for (size_t i = 0; i < memory_blocks_.size(); ++i) { auto parent = memory_blocks_[i]; - if (parent == nullptr || parent->deleted_block_) { + if (parent == nullptr || parent->deleted_block_ || parent->continuous_block_) { continue; } if (parent->reuse_mem_ && !IsPostReuse(parent)) { parent->reuse_mem_ = false; } for (size_t j = i + 1; j < memory_blocks_.size(); ++j) { - parent->AddLifeReuseBlock(memory_blocks_[j], total_node_depend_stream_life_); + auto child = memory_blocks_[j]; + if (child == nullptr) { + continue; + } + // If node is before atomic_addr_clean node, the continus memory can't be reused. + if (!parent->NodeTypeIndexList().empty() && child->continuous_block_) { + auto node = parent->NodeTypeIndexList()[0].node; + if (node == nullptr || node->GetOpDesc() == nullptr || (node->GetOpDesc()->GetId() < GetAtomicAddrCleanId())) { + continue; + } + } + parent->AddLifeReuseBlock(child, total_node_depend_stream_life_); } } } @@ -1450,8 +1569,8 @@ Status BlockMemAssigner::Assign() { bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || - (node_type == HCOMBROADCAST) || (node_type == HCOMALLREDUCE) || (node_type == CONSTANTOP) || - (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || - (node_type == HVDCALLBACKBROADCAST) || (node_type == HVDCALLBACKALLREDUCE); + (node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) || (node_type == ASSIGNADD) || + (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || + (node_type == HVDCALLBACKBROADCAST); } } // namespace ge diff --git a/src/ge/graph/build/memory/block_mem_assigner.h b/src/ge/graph/build/memory/block_mem_assigner.h index 4e9c3b05..3dfba4c5 100644 --- a/src/ge/graph/build/memory/block_mem_assigner.h +++ b/src/ge/graph/build/memory/block_mem_assigner.h @@ -90,6 +90,8 @@ class MemoryBlock { } size_t Size() const { return block_size_; } + size_t AlignSize() const; + void SetHeadOffset(size_t offset); void SetTailOffset(size_t offset); @@ -118,6 +120,8 @@ class MemoryBlock { bool IsSameLabel(std::string &first_batch_label); + void AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life); + void AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &node_depend_stream_life); void SetLifeTimeEnd(size_t time); @@ -362,6 +366,10 @@ class BlockMemAssigner : public MemAssigner { /// void ReuseBlocksByLifeTime(size_t range_size); + bool IsContinuousOutput(const NodePtr &n); + + MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector &ranges, const bool is_op_reuse_mem); + std::unordered_map> reusable_blocks_; std::map reusable_block_counts_; diff --git a/src/ge/graph/build/memory/graph_mem_assigner.cc b/src/ge/graph/build/memory/graph_mem_assigner.cc index 8393c474..5ad49f75 100644 --- a/src/ge/graph/build/memory/graph_mem_assigner.cc +++ b/src/ge/graph/build/memory/graph_mem_assigner.cc @@ -293,7 +293,8 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { } else if (is_loop_graph) { GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, mem_clean_start)); } else { - GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, mem_clean_start, mem_clean_size), "SetAtomicCleanAttr failed."); + GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {mem_clean_start}, {mem_clean_size}), + "SetAtomicCleanAttr failed."); } } } @@ -441,35 +442,33 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); vector output_list = out_op_desc->GetOutputOffset(); - if (out_op_desc->GetOutputsSize() > output_list.size()) { + if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", out_op_desc->GetOutputsSize(), output_list.size()); return ge::FAILED; } - memory_offset_[0].mem_offset_ += MEM_ALIGN_SIZE; + size_t mem_offset = output_list[0]; for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { - output_list[out_data_anchor->GetIdx()] = memory_offset_[0].mem_offset_; - size_t pre_mem_offset = memory_offset_[0].mem_offset_; - + output_list[out_data_anchor->GetIdx()] = mem_offset; int64_t tensor_desc_size = 0; if (ge::TensorUtils::GetSize(*(out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx())), tensor_desc_size) != ge::SUCCESS) { GELOGE(FAILED, "GetSize failed."); return FAILED; } - memory_offset_[0].mem_offset_ += tensor_desc_size; - - AlignMemOffset(MEM_ALIGN_SIZE); + mem_offset += tensor_desc_size; + if (mem_offset <= 0) { + return FAILED; + } + mem_offset = (mem_offset + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; GELOGI( - "[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " + "[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " "real_size[%ld].", node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), - pre_mem_offset, out_op_desc->GetStreamId(), (memory_offset_[0].mem_offset_ - pre_mem_offset), tensor_desc_size); + output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size); } - out_op_desc->SetOutputOffset(output_list); - memory_offset_[0].mem_offset_ += MEM_ALIGN_SIZE; return ge::SUCCESS; } @@ -809,14 +808,12 @@ Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map(memory_offset_[0].mem_offset_); GELOGI("Begin to reAssign atomic memory, atomic initial address mem_offset = %zu!", memory_offset_[0].mem_offset_); + vector connect_netoutput_nodes; for (auto &node : compute_graph_->GetAllNodes()) { auto node_op_desc = node->GetOpDesc(); if (node_op_desc == nullptr) { @@ -839,36 +836,20 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { return ge::PARAM_INVALID; } - // Atomic op memory start addr of loop graph - int64_t loop_graph_atomic_mem_start = static_cast(memory_offset_[0].mem_offset_); - - // Reassign atomic node output memory - Status ret = AssignAtomicOutputMemory(node); - if (ret != SUCCESS) { - GELOGE(ret, "Assign atomic output memory failed, node is %s.", node_op_desc->GetName().c_str()); - return ret; + vector is_connect_netoutput; + // If GetBool fail, attr is_connect_netoutput is an empty vector. + (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput); + if (!is_connect_netoutput.empty()) { + connect_netoutput_nodes.emplace_back(node); + continue; } - // Check atomic workspace - map> sub_node_workspace_info; - sub_node_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, sub_node_workspace_info); - if (!sub_node_workspace_info.empty()) { - bool is_fusion_node = false; - // If GetBool fail, is_fusion_node is false. - (void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node); - - if (is_fusion_node) { - // Assign fusion atomic node workspace memory - ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, sub_node_workspace_info); - } else { - // Assign single ordinary atomic node workspace memory, not include fusion node - ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, sub_node_workspace_info); - } - - if (ret != SUCCESS) { - GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str()); - return ret; - } + // Atomic op memory start addr of loop graph + int64_t loop_graph_atomic_mem_start = static_cast(memory_offset_[0].mem_offset_); + vector mem_offset_end; + if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) { + GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str()); + return FAILED; } /// In networks with loop op, atomic op uses atomic_addr_clean op independently, @@ -883,13 +864,80 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { // Set the address attr of atomic clean operator int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start; if (atomic_mem_size != 0) { - GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, atomic_mem_start, atomic_mem_size), "SetAtomicCleanAttr failed."); + GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {atomic_mem_start}, {atomic_mem_size}), + "SetAtomicCleanAttr failed."); + } + } + + if (AssignConnectNetOutputAtomicMemory(connect_netoutput_nodes) != SUCCESS) { + GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput."); + return FAILED; + } + + return SUCCESS; +} + +Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node, + vector &mem_offset_end) { + auto node_op_desc = node->GetOpDesc(); + // Assign atomic node output memory + Status ret = AssignAtomicOutputMemory(node, mem_offset_end); + if (ret != SUCCESS) { + GELOGE(ret, "Failed to assign atomic output memory, node is %s.", node_op_desc->GetName().c_str()); + return ret; + } + + // Check and assign atomic node workspace memory + map> atomic_workspace_info; + atomic_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_info); + if (!atomic_workspace_info.empty()) { + bool is_fusion_node = false; + // If GetBool fail, is_fusion_node is false. + (void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node); + + if (is_fusion_node) { + // Assign fusion atomic node workspace memory + ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end); + } else { + // Assign single ordinary atomic node workspace memory, not include fusion node + ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end); + } + if (ret != SUCCESS) { + GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str()); + return ret; } } return SUCCESS; } +Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector &connect_netoutput_nodes) { + for (auto &node : connect_netoutput_nodes) { + GE_CHECK_NOTNULL(node); + if (node->GetOpDesc() == nullptr) { + GELOGW("Current node %s op desc is nullptr, memory assignment is skipped.", node->GetName().c_str()); + continue; + } + + // Atomic memory start addr + int64_t original_atomic_mem_start = static_cast(memory_offset_[0].mem_offset_); + GELOGD("Start to assign memory of atomic node, node name: %s, node type: %s, mem_offset: %ld.", + node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start); + vector mem_offset_end; + if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) { + GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str()); + return FAILED; + } + + // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately. + if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end) != SUCCESS) { + GELOGE(FAILED, "Failed to set atomic attr separately."); + return FAILED; + } + } + return SUCCESS; +} + Status GraphMemoryAssigner::AssignReferenceMemory() { for (auto &node : compute_graph_->GetDirectNode()) { // Get the reference type of the node, default is false @@ -971,9 +1019,10 @@ bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) { return true; } -Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) { +Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, vector &mem_offset_end) { auto op_desc = node->GetOpDesc(); GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(ge::FAILED, "op_desc is null."); return ge::FAILED); + mem_offset_end.clear(); GELOGD("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str()); vector atomic_output_index; @@ -996,24 +1045,9 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) { // If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here bool is_assigned_mem = false; - if (static_cast(output_index) >= node->GetAllOutDataAnchors().size()) { - GELOGE(ge::PARAM_INVALID, "Output index %ld is more than the size of node's AllOutDataAnchors.", output_index); - return ge::PARAM_INVALID; - } - auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index); - GE_CHECK_NOTNULL(out_data_anchor); - auto input_anchors = out_data_anchor->GetPeerInDataAnchors(); - for (auto &input_anchor : input_anchors) { - auto output_node = input_anchor->GetOwnerNode(); - - /// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address - /// has been assigned - vector atomic_input_index; - (void)ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index); - if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) { - is_assigned_mem = true; - break; - } + if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) { + GELOGE(ge::FAILED, "Failed to get memory assignment of node %s.", node->GetName().c_str()); + return ge::FAILED; } // If you have already assigned an atomic address, skip it, and you don't need to reassign it. @@ -1038,6 +1072,7 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) { memory_offset_[0].mem_offset_ += size; AlignMemOffset(MEM_ALIGN_SIZE); + mem_offset_end.emplace_back(memory_offset_[0].mem_offset_); } op_desc->SetOutputOffset(output_list); @@ -1045,8 +1080,33 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) { return ge::SUCCESS; } +Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index, + bool &is_mem_assigned) { + if (static_cast(output_index) >= node->GetAllOutDataAnchors().size()) { + GELOGE(ge::PARAM_INVALID, "Output index %ld is more than the size of node's AllOutDataAnchors.", output_index); + return ge::PARAM_INVALID; + } + auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index); + GE_CHECK_NOTNULL(out_data_anchor); + auto input_anchors = out_data_anchor->GetPeerInDataAnchors(); + for (auto &input_anchor : input_anchors) { + auto output_node = input_anchor->GetOwnerNode(); + + /// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address + /// has been assigned + vector atomic_input_index; + (void)ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index); + if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) { + is_mem_assigned = true; + break; + } + } + return SUCCESS; +} + Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc, - map> &workspace_info) { + map> &workspace_info, + vector &mem_offset_end) { GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str()); vector workspace_vector = op_desc->GetWorkspace(); @@ -1078,6 +1138,7 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc op_desc->GetStreamId(), workspace_size, workspace_size); memory_offset_[0].mem_offset_ += workspace_size; + mem_offset_end.emplace_back(memory_offset_[0].mem_offset_); } } op_desc->SetWorkspace(workspace_vector); @@ -1086,7 +1147,8 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc } Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc, - map> &workspace_info) { + map> &workspace_info, + vector &mem_offset_end) { GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str()); map> sub_node_workspace_offset; @@ -1108,6 +1170,7 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt op_desc->GetStreamId(), workspace_size, workspace_size); memory_offset_[0].mem_offset_ += workspace_size; + mem_offset_end.emplace_back(memory_offset_[0].mem_offset_); index_offset.insert(std::make_pair(workspace_index, workspace_offset)); } sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset)); @@ -1287,6 +1350,47 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const { return SUCCESS; } +Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, + const vector &mem_offset_end) { + GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start); + + // Parsing offset and size vectors + vector memory_offset_start; + vector memory_offset_size; + memory_offset_start.emplace_back(atomic_mem_start); + for (size_t i = 0; i < mem_offset_end.size(); ++i) { + memory_offset_start.emplace_back(mem_offset_end[i]); + // Number 1 means element index + auto size = memory_offset_start[i + 1] - memory_offset_start[i]; + memory_offset_size.emplace_back(size); + } + memory_offset_start.pop_back(); + + const auto &in_control_anchor = node->GetInControlAnchor(); + if (!memory_offset_size.empty() && in_control_anchor != nullptr) { + for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { + if (peer_out_control_anchor == nullptr) { + continue; + } + auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); + auto peer_out_node_desc = peer_out_node->GetOpDesc(); + if (peer_out_node_desc == nullptr) { + continue; + } + + GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(), + peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str()); + if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { + if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size) != SUCCESS) { + GELOGE(FAILED, "Set atomic clean attr failed."); + return FAILED; + } + } + } + } + return SUCCESS; +} + Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start) { // set the address attr of atomic clean operator for loop graph int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start; @@ -1308,7 +1412,7 @@ Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int6 peer_out_node_desc->GetType().c_str()); if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { - GE_CHK_STATUS_EXEC(SetAtomicCleanAttr(peer_out_node, atomic_mem_start, atomic_mem_size), + GE_CHK_STATUS_EXEC(SetAtomicCleanAttr(peer_out_node, {atomic_mem_start}, {atomic_mem_size}), GELOGE(FAILED, "SetAtomicCleanAttr failed."); return FAILED); } @@ -1317,8 +1421,8 @@ Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int6 return SUCCESS; } -ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, int64_t atomic_mem_start, - int64_t atomic_mem_size) { +ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, const vector &atomic_mem_start, + const vector &atomic_mem_size) { for (ge::NodePtr &node : compute_graph_->GetAllNodes()) { auto node_op_desc = node->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); @@ -1327,15 +1431,15 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, int64_t ato ((n == nullptr) && (node_op_desc->GetType() == ATOMICADDRCLEAN))) { vector workspace_vector = node_op_desc->GetWorkspace(); vector workspace_byte_vector = node_op_desc->GetWorkspaceBytes(); - workspace_vector.emplace_back(atomic_mem_start); - workspace_byte_vector.emplace_back(atomic_mem_size); + workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); + workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); node_op_desc->SetWorkspace(workspace_vector); node_op_desc->SetWorkspaceBytes(workspace_byte_vector); std::vector mem_start_vector; // If GetListInt fail, mem_start_vector is empty. (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); - mem_start_vector.emplace_back(atomic_mem_start); + mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), GELOGE(FAILED, "SetListInt failed."); return FAILED); @@ -1343,16 +1447,26 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, int64_t ato std::vector mem_size_vector; // If GetListInt fail, mem_size_vector is empty. (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); - mem_size_vector.emplace_back(atomic_mem_size); + mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), GELOGE(FAILED, "SetListInt failed."); return FAILED); - GELOGI( - "[IMAS]SetAtomicCleanAttr : Set %s name[%s] output[%d] offset to [%ld] streamid[%ld] size[%ld] " - "realsize[%ld].", - node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), 0, atomic_mem_start, - node->GetOpDesc()->GetStreamId(), atomic_mem_size, atomic_mem_size); + std::stringstream ss; + for (auto iter : atomic_mem_start) { + ss << iter << " "; + } + string atomic_mem_start_str = ss.str(); + ss.clear(); + ss.str(""); + for (auto iter : atomic_mem_size) { + ss << iter << " "; + } + string atomic_mem_size_str = ss.str(); + + GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]", + node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), + atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId()); } } return SUCCESS; diff --git a/src/ge/graph/build/memory/graph_mem_assigner.h b/src/ge/graph/build/memory/graph_mem_assigner.h index 67008918..afe9a4fa 100644 --- a/src/ge/graph/build/memory/graph_mem_assigner.h +++ b/src/ge/graph/build/memory/graph_mem_assigner.h @@ -147,22 +147,33 @@ class GraphMemoryAssigner { /// bool CheckInputIsSupportAtomic(const ge::NodePtr &node); - ge::Status AssignAtomicOutputMemory(const ge::NodePtr &node); + ge::Status GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index, bool &is_mem_assigned); + + ge::Status AssignAtomicOutputMemory(const ge::NodePtr &node, std::vector &mem_offset_end); ge::Status AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc, - std::map> &workspace_info); + std::map> &workspace_info, + std::vector &mem_offset_end); ge::Status AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc, - std::map> &workspace_info); + std::map> &workspace_info, + std::vector &mem_offset_end); + + ge::Status AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node, std::vector &mem_offset_end); + ge::Status AssignConnectNetOutputAtomicMemory(vector &connect_netoutput_nodes); + + ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, + const std::vector &mem_offset_end); /// /// @brief set loop graph atomic attr - /// @param node + /// @param node, atomic memory assignment start offset /// @param atomic_mem_start: atomic op memory start address /// ge::Status SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start); - ge::Status SetAtomicCleanAttr(const ge::NodePtr &n, int64_t atomic_mem_start, int64_t atomic_mem_size); + ge::Status SetAtomicCleanAttr(const ge::NodePtr &n, const std::vector &atomic_mem_start, + const std::vector &atomic_mem_size); void AlignMemOffset(const int64_t &mem_align_size); diff --git a/src/ge/graph/build/model_builder.cc b/src/ge/graph/build/model_builder.cc index a765d8e7..853b09a6 100644 --- a/src/ge/graph/build/model_builder.cc +++ b/src/ge/graph/build/model_builder.cc @@ -42,10 +42,12 @@ #include "graph/utils/op_desc_utils.h" #include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" +#include "graph/passes/memcpy_addr_async_pass.h" #include "init/gelib.h" #include "memory/memory_assigner.h" #include "omg/version.h" #include "register/op_registry.h" +#include "graph/passes/set_input_output_offset_pass.h" using std::map; using std::set; @@ -668,12 +670,36 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) { GE_CHK_STATUS_RET(label_allocator.AssignFunctionalLabels(label_num_), "Assign label failed."); GE_TIMESTAMP_END(AssignFunctionalLabels, "ModelBuilder::AssignFunctionalLabels"); + // Add memcpy_addr_async node. + rtFeatureType_t feature_type = FEATURE_TYPE_MEMCPY; + int32_t feature_info = MEMCPY_INFO_SUPPORT_ZEROCOPY; + int64_t value = 0; + rtError_t rt_ret = rtGetRtCapability(feature_type, feature_info, &value); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "rtGetRtCapability failed."); + return RT_FAILED; + } else { + if (value == RT_CAPABILITY_SUPPORT) { + GE_TIMESTAMP_START(AddMemcpyAddrAsyncNode); + MemcpyAddrAsyncPass memcpy_addr; + GE_CHK_STATUS_RET(memcpy_addr.Run(compute_graph_), "Add memcpy_addr_async node failed."); + GE_TIMESTAMP_END(AddMemcpyAddrAsyncNode, "MemcpyAddrAsyncPass::Run."); + } else { + GELOGW("rtGetRtCapability not support memcpy_addr_async."); + } + } + GE_TIMESTAMP_START(AssignMemory); MemoryAssigner mem_assigner(compute_graph_); GE_CHK_STATUS_RET(mem_assigner.AssignMemory(is_loop_graph_, mem_offset_, zero_copy_mem_size_), "Assign Memory Failed!"); GE_TIMESTAMP_END(AssignMemory, "GraphBuilder::AssignMemory"); + GE_TIMESTAMP_START(SetInputOutputOffset); + SetInputOutputOffsetPass input_output_offset; + GE_CHK_STATUS_RET(input_output_offset.Run(compute_graph_), "Set input output offset failed."); + GE_TIMESTAMP_END(SetInputOutputOffset, "SetInputOutputOffsetPass::Run."); + // Compile single op in graph build stage GE_TIMESTAMP_START(CompileSingleOp); GE_CHK_STATUS_RET(CompileSingleOp(), "ATC builder CompileSingleOp() return fail."); diff --git a/src/ge/graph/build/stream_allocator.cc b/src/ge/graph/build/stream_allocator.cc index d49bb61b..5c82f461 100644 --- a/src/ge/graph/build/stream_allocator.cc +++ b/src/ge/graph/build/stream_allocator.cc @@ -612,6 +612,33 @@ bool StreamAllocator::IsRecvNodeActivatedBySendNode(const NodePtr &send_node_ptr AttrUtils::HasAttr(activate_stream_node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE)) { return false; } + + /// + /// stream_0 --> stream_2 --> stream_3 --> stream_4 + /// /\ | + /// | \/ + /// | stream_1 --> stream_5 --> stream_6 --> stream_7 + /// | /\ | | + /// | | \/ | + /// | |---------- stream_8 | + /// | | + /// |-----------------------------------------------------------| + /// + /// Exit1(S7) Exit2(S7) Exit3(S7) + /// \ / | + /// AddN(S1) NextIteration(S7) + /// | | + /// NextIteration(S1) / + /// | / + /// | / + /// StreamActive(S7) + /// + /// Event between Exit1/Exit2 and AddN should not be optimized + /// + if (IsActiveAfterNextIteration(activate_stream_node)) { + continue; + } + visited_nodes.insert(activate_stream_node); // nodes in stream link to streamActivate no need to add event before activated node for (const auto &pre_activate_stream_node : activate_stream_node->GetInNodes()) { @@ -639,6 +666,18 @@ bool StreamAllocator::IsRecvNodeActivatedBySendNode(const NodePtr &send_node_ptr return false; } +bool StreamAllocator::IsActiveAfterNextIteration(const NodePtr &active_node_ptr) const { + if ((active_node_ptr == nullptr) || active_node_ptr->GetInControlNodes().empty()) { + return false; + } + for (const auto &in_node : active_node_ptr->GetInControlNodes()) { + if ((in_node->GetType() != NEXTITERATION) && (in_node->GetType() != REFNEXTITERATION)) { + return false; + } + } + return true; +} + // Split the stream according to the maximum number of nodes in the stream. Status StreamAllocator::SplitStreams(vector> &split_streams) { if (enable_single_stream_ || stream_num_ == 0) { diff --git a/src/ge/graph/build/stream_allocator.h b/src/ge/graph/build/stream_allocator.h index a201a138..a5326a39 100644 --- a/src/ge/graph/build/stream_allocator.h +++ b/src/ge/graph/build/stream_allocator.h @@ -55,6 +55,7 @@ class StreamAllocator { Status OptimizeByStreamActivate(); // Determine if the successor node of RecvNode is directly or indirectly activated by the SendNode precursor node bool IsRecvNodeActivatedBySendNode(const NodePtr &send_node_ptr, const NodePtr &recv_node_ptr) const; + bool IsActiveAfterNextIteration(const NodePtr &active_node_ptr) const; Status SplitStreams(std::vector> &split_streams); bool NeedSpiltNewStream(int64_t stream_node_num, int64_t max_node_num_one_stream, const OpDescPtr &op_desc) const; diff --git a/src/ge/graph/execute/graph_execute.cc b/src/ge/graph/execute/graph_execute.cc index 5ff89c07..1bebd382 100644 --- a/src/ge/graph/execute/graph_execute.cc +++ b/src/ge/graph/execute/graph_execute.cc @@ -86,10 +86,10 @@ Status GraphExecutor::SetGraphContext(GraphContextPtr graph_context_ptr) { return SUCCESS; } -Status GraphExecutor::SetDynamicSize(uint32_t model_id, const std::vector &batch_num) { +Status GraphExecutor::SetDynamicSize(uint32_t model_id, const std::vector &batch_num, int32_t dynamic_type) { auto model_manager = ge::ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); - Status ret = model_manager->SetDynamicSize(model_id, batch_num); + Status ret = model_manager->SetDynamicSize(model_id, batch_num, dynamic_type); if (ret != SUCCESS) { GELOGE(FAILED, "SetDynamicSize failed"); return ret; @@ -486,12 +486,14 @@ Status GraphExecutor::GetInputOutputDescInfo(const uint32_t model_id, vector> &batch_info) { +Status GraphExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector> &batch_info, + int32_t &dynamic_type) { auto model_manager = ge::ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); - Status ret = model_manager->GetDynamicBatchInfo(model_id, batch_info); + Status ret = model_manager->GetDynamicBatchInfo(model_id, batch_info, dynamic_type); if (ret != SUCCESS) { GELOGE(ret, "GetDynamicBatchInfo failed."); return ret; @@ -499,12 +501,30 @@ Status GraphExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector &batch_info) { +/// +/// @ingroup ge +/// @brief Get combined dynamic dims info +/// @param [in] model_id +/// @param [out] batch_info +/// @return execute result +/// +Status GraphExecutor::GetCombinedDynamicDims(uint32_t model_id, std::vector> &batch_info) { + auto model_manager = ge::ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + Status ret = model_manager->GetCombinedDynamicDims(model_id, batch_info); + if (ret != SUCCESS) { + GELOGE(ret, "GetCombinedDynamicDims failed."); + return ret; + } + return SUCCESS; +} + +Status GraphExecutor::GetCurShape(const uint32_t model_id, std::vector &batch_info, int32_t &dynamic_type) { auto model_manager = ge::ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); - Status ret = model_manager->GetCurShape(model_id, batch_info); + Status ret = model_manager->GetCurShape(model_id, batch_info, dynamic_type); if (ret != SUCCESS) { - GELOGE(FAILED, "GetCurShape failed"); + GELOGE(ret, "GetCurShape failed"); return ret; } return SUCCESS; diff --git a/src/ge/graph/execute/graph_execute.h b/src/ge/graph/execute/graph_execute.h index 6919a439..f79a2e29 100644 --- a/src/ge/graph/execute/graph_execute.h +++ b/src/ge/graph/execute/graph_execute.h @@ -56,7 +56,7 @@ class GraphExecutor { Status SetGraphContext(GraphContextPtr graph_context_ptr); - static Status SetDynamicSize(uint32_t model_id, const std::vector &batch_num); + static Status SetDynamicSize(uint32_t model_id, const std::vector &batch_num, int32_t dynamic_type); void SetTrainFlag(bool is_train_graph); @@ -80,11 +80,22 @@ class GraphExecutor { /// @brief Get dynamic batch_info /// @param [in] model_id /// @param [out] batch_info + /// @param [out] dynamic_type /// @return execute result /// - static Status GetDynamicBatchInfo(uint32_t model_id, std::vector> &batch_info); + static Status GetDynamicBatchInfo(uint32_t model_id, std::vector> &batch_info, + int32_t &dynamic_type); - static Status GetCurShape(const uint32_t model_id, std::vector &batch_info); + /// + /// @ingroup ge + /// @brief Get combined dynamic dims info + /// @param [in] model_id + /// @param [out] batch_info + /// @return execute result + /// + static Status GetCombinedDynamicDims(uint32_t model_id, std::vector> &batch_info); + + static Status GetCurShape(const uint32_t model_id, std::vector &batch_info, int32_t &dynamic_type); static Status GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info); diff --git a/src/ge/graph/label/while_label_maker.cc b/src/ge/graph/label/while_label_maker.cc index 6601abd1..c5e0abb7 100644 --- a/src/ge/graph/label/while_label_maker.cc +++ b/src/ge/graph/label/while_label_maker.cc @@ -104,12 +104,11 @@ Status WhileOpLabelMaker::Run(uint32_t &label_index) { GE_CHECK_NOTNULL(cond_out_desc); GeTensorDesc pred_desc = cond_out_desc->GetInputDesc(kCondOutputIndex); - GeTensorDesc cond_desc(GeShape(pred_desc.GetShape().GetDims()), pred_desc.GetFormat(), DT_INT32); // false ==> 0 ==> switch_labels[0] ==> body_leave_index // true ==> 1 ==> switch_labels[1] ==> body_enter_name const std::vector switch_labels = {body_leave_index, body_enter_index}; - NodePtr switch_node = AddLabelSwitchLeave(cond_graph, cond_leave_name, cond_desc, switch_labels); + NodePtr switch_node = AddLabelSwitchLeave(cond_graph, cond_leave_name, pred_desc, switch_labels); if (switch_node == nullptr) { GELOGE(INTERNAL_ERROR, "Subgraph: %s add label switch failed.", cond_graph->GetName().c_str()); return FAILED; diff --git a/src/ge/graph/load/graph_loader.cc b/src/ge/graph/load/graph_loader.cc index 4a986308..d181f3a5 100644 --- a/src/ge/graph/load/graph_loader.cc +++ b/src/ge/graph/load/graph_loader.cc @@ -36,20 +36,20 @@ GraphLoader::~GraphLoader() = default; Status GraphLoader::UnloadModel(uint32_t model_id) { auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); - GELOGI("UnLoad model begin, model_id:%u.", model_id); + GELOGI("UnLoad model begin, model id:%u.", model_id); Status ret = model_manager->Stop(model_id); if (ret != SUCCESS) { - GELOGE(ret, "UnloadModel: Stop failed."); + GELOGE(ret, "UnloadModel: Stop failed. model id:%u", model_id); } ret = model_manager->Unload(model_id); if (ret != SUCCESS) { - GELOGE(ret, "UnloadModel: Unload failed."); + GELOGE(ret, "UnloadModel: Unload failed. model id:%u", model_id); CsaInteract::GetInstance().WriteErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_UNLOAD); return ret; } - GELOGI("UnLoad model success, model_id:%u.", model_id); + GELOGI("UnLoad model success, model id:%u.", model_id); return SUCCESS; } @@ -123,14 +123,14 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string Status ret; try { if (!CheckInputPathValid(path)) { - GELOGE(PARAM_INVALID, "model path is invalid: %s", path.c_str()); - return PARAM_INVALID; + GELOGE(GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str()); + return GE_EXEC_MODEL_PATH_INVALID; } GELOGI("Load model begin, model path is: %s", path.c_str()); if (!key_path.empty() && !CheckInputPathValid(key_path)) { - GELOGE(PARAM_INVALID, "decrypt_key path is invalid: %s", key_path.c_str()); - return PARAM_INVALID; + GELOGE(GE_EXEC_MODEL_KEY_PATH_INVALID, "decrypt_key path is invalid: %s", key_path.c_str()); + return GE_EXEC_MODEL_KEY_PATH_INVALID; } ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data); diff --git a/src/ge/graph/load/new_model_manager/cpu_queue_schedule.cc b/src/ge/graph/load/new_model_manager/cpu_queue_schedule.cc index a0011b34..01e1cfa8 100644 --- a/src/ge/graph/load/new_model_manager/cpu_queue_schedule.cc +++ b/src/ge/graph/load/new_model_manager/cpu_queue_schedule.cc @@ -16,6 +16,7 @@ #include "graph/load/new_model_manager/cpu_queue_schedule.h" #include "common/debug/ge_log.h" +#include "common/debug/log.h" namespace { const uint32_t kCoreDim = 1; // for rtCpuKernelLaunch @@ -58,7 +59,7 @@ Status CpuTaskModelDequeue::Init(uint32_t queue_id, uintptr_t &in_mbuf) { rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(status); } in_mbuf = reinterpret_cast(args_) + sizeof(MbufQueueInfo); GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) @@ -69,7 +70,7 @@ Status CpuTaskModelDequeue::Init(uint32_t queue_id, uintptr_t &in_mbuf) { status = rtMemcpy(args_, args_size_, &queue_info, sizeof(MbufQueueInfo), RT_MEMCPY_HOST_TO_DEVICE); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(status); } return SUCCESS; @@ -84,7 +85,7 @@ Status CpuTaskModelDequeue::Distribute() { rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelDequeue, kCoreDim, args_, args_size_, nullptr, stream_); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelDequeue failed, status: 0x%X", status); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(status); } GELOGI("Cpu kernel launch model dequeue task success."); @@ -98,24 +99,24 @@ Status CpuTaskModelDequeue::Distribute() { /// @param [in] outside_addrs: model input/output memory addr /// @return: 0 for success / others for failed /// -Status CpuTaskZeroCopy::Init(std::vector &mbuf_list, - std::map> &outside_addrs) { +Status CpuTaskZeroCopy::Init(std::vector &mbuf_list, std::map &outside_addrs) { if ((args_ != nullptr) || (args_size_ > 0)) { GELOGE(FAILED, "Task already initialized, size: %u", args_size_); return FAILED; } args_size_ = sizeof(AddrMapInfo); - rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); - if (status != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); - return RT_FAILED; - } + GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM)); GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) AddrMapInfo addr_map_info; - for (const auto &addrs : outside_addrs) { - addr_map_info.addr_num += addrs.second.size(); + for (auto &addrs : outside_addrs) { + auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); + GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs"); + std::map> virtual_args_addrs = addrs_mapping_list[0]; + for (const auto &virtual_args_addr : virtual_args_addrs) { + addr_map_info.addr_num += virtual_args_addr.second.size(); + } } GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num); @@ -123,38 +124,31 @@ Status CpuTaskZeroCopy::Init(std::vector &mbuf_list, size_t index = 0; vector src_addrs; vector dst_addrs; - for (const auto &addrs : outside_addrs) { - for (size_t i = 0; i < addrs.second.size(); ++i) { - src_addrs.push_back(mbuf_list.at(index)); - dst_addrs.push_back(reinterpret_cast(reinterpret_cast(addrs.second.at(i)))); + for (auto &addrs : outside_addrs) { + auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); + GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs"); + std::map> virtual_args_addrs = addrs_mapping_list[0]; + for (const auto &virtual_args_addr : virtual_args_addrs) { + for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) { + src_addrs.push_back(mbuf_list.at(index)); + dst_addrs.push_back(reinterpret_cast(reinterpret_cast(virtual_args_addr.second.at(i)))); + } } index++; } // malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs - status = rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM); - if (status != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); - return RT_FAILED; - } - status = rtMemcpy(src_addr_, src_addrs.size() * sizeof(uint64_t), src_addrs.data(), - src_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); - if (status != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); - return RT_FAILED; - } + GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM)); + rtError_t status = rtMemcpy(src_addr_, src_addrs.size() * sizeof(uint64_t), src_addrs.data(), + src_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); + GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", status); + return RT_ERROR_TO_GE_STATUS(status);) - status = rtMalloc(&dst_addr_, dst_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM); - if (status != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); - return RT_FAILED; - } + GE_CHK_RT_RET(rtMalloc(&dst_addr_, dst_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM)); status = rtMemcpy(dst_addr_, dst_addrs.size() * sizeof(uint64_t), dst_addrs.data(), dst_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); - if (status != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); - return RT_FAILED; - } + GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", status); + return RT_ERROR_TO_GE_STATUS(status);) // src_addr_list is init to src_addr, which is the point to src_addrs if (!src_addrs.empty() && !dst_addrs.empty()) { @@ -164,10 +158,8 @@ Status CpuTaskZeroCopy::Init(std::vector &mbuf_list, } status = rtMemcpy(args_, args_size_, &addr_map_info, sizeof(AddrMapInfo), RT_MEMCPY_HOST_TO_DEVICE); - if (status != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); - return RT_FAILED; - } + GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", status); + return RT_ERROR_TO_GE_STATUS(status);) return SUCCESS; } @@ -180,7 +172,7 @@ Status CpuTaskZeroCopy::Distribute() { rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskZeroCopy, kCoreDim, args_, args_size_, nullptr, stream_); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ZeroCopy failed, status: 0x%X", status); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(status); } GELOGI("Cpu kernel launch zero copy task success."); @@ -225,7 +217,7 @@ Status CpuTaskPrepareOutput::Init(uintptr_t addr, uint32_t size, uintptr_t in_mb rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(status); } out_mbuf = reinterpret_cast(args_) + sizeof(PrepareOutputInfo); GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) @@ -239,7 +231,7 @@ Status CpuTaskPrepareOutput::Init(uintptr_t addr, uint32_t size, uintptr_t in_mb status = rtMemcpy(args_, args_size_, &prepare, sizeof(PrepareOutputInfo), RT_MEMCPY_HOST_TO_DEVICE); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(status); } return SUCCESS; @@ -254,7 +246,7 @@ Status CpuTaskPrepareOutput::Distribute() { rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskPrepareOutput, kCoreDim, args_, args_size_, nullptr, stream_); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt CpuKernelLaunch PrepareOutput failed, status: 0x%X", status); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(status); } GELOGI("Cpu kernel launch prepare output task success."); @@ -279,7 +271,7 @@ Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) { rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(status); } GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) @@ -289,7 +281,7 @@ Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) { status = rtMemcpy(args_, args_size_, &queue_info, args_size_, RT_MEMCPY_HOST_TO_DEVICE); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(status); } return SUCCESS; @@ -304,7 +296,7 @@ Status CpuTaskModelEnqueue::Distribute() { rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelEnqueue, kCoreDim, args_, args_size_, nullptr, stream_); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelEnqueue failed, status: 0x%X", status); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(status); } GELOGI("Cpu kernel launch model enqueue task success."); @@ -336,7 +328,7 @@ Status CpuTaskActiveEntry::Distribute() { rtError_t ret = rtStreamActive(active_stream_, stream_); if (ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt StreamActive failed, ret: 0x%X", ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(ret); } GELOGI("Cpu kernel launch active entry task success."); @@ -359,14 +351,14 @@ Status CpuTaskWaitEndGraph::Init(uint32_t model_id) { rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(status); } GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(status); } return SUCCESS; @@ -381,7 +373,7 @@ Status CpuTaskWaitEndGraph::Distribute() { rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskWaitEndGraph, kCoreDim, args_, args_size_, nullptr, stream_); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt CpuKernelLaunch WaitEndGraph failed, status: 0x%X", status); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(status); } GELOGI("Cpu kernel launch wait end task success."); @@ -404,14 +396,14 @@ Status CpuTaskModelRepeat::Init(uint32_t model_id) { rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(status); } GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(status); } return SUCCESS; @@ -426,7 +418,7 @@ Status CpuTaskModelRepeat::Distribute() { rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelRepeat, kCoreDim, args_, args_size_, nullptr, stream_); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelRepeat failed, status: 0x%x", status); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(status); } GELOGI("Cpu kernel launch repeat task success."); diff --git a/src/ge/graph/load/new_model_manager/cpu_queue_schedule.h b/src/ge/graph/load/new_model_manager/cpu_queue_schedule.h index c4ae4df5..cea00613 100644 --- a/src/ge/graph/load/new_model_manager/cpu_queue_schedule.h +++ b/src/ge/graph/load/new_model_manager/cpu_queue_schedule.h @@ -22,6 +22,7 @@ #include "common/ge_inner_error_codes.h" #include "graph/load/new_model_manager/task_info/task_info.h" +#include "graph/load/new_model_manager/zero_copy_offset.h" #include "runtime/kernel.h" namespace ge { @@ -93,7 +94,7 @@ class CpuTaskZeroCopy : public CpuTaskInfo { ~CpuTaskZeroCopy() override; Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; } - Status Init(std::vector &mbuf_list, std::map> &outside_addrs); + Status Init(std::vector &mbuf_list, std::map &outside_addrs); Status Distribute() override; diff --git a/src/ge/graph/load/new_model_manager/data_dumper.cc b/src/ge/graph/load/new_model_manager/data_dumper.cc index a4fe8898..7194264d 100644 --- a/src/ge/graph/load/new_model_manager/data_dumper.cc +++ b/src/ge/graph/load/new_model_manager/data_dumper.cc @@ -487,8 +487,8 @@ Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_in size_t proto_size = op_mapping_info.ByteSizeLong(); bool ret = op_mapping_info.SerializeToString(&proto_str); if (!ret || proto_size == 0) { - GELOGE(FAILED, "Protobuf SerializeToString failed, proto size %zu.", proto_size); - return FAILED; + GELOGE(PARAM_INVALID, "Protobuf SerializeToString failed, proto size %zu.", proto_size); + return PARAM_INVALID; } if (dev_mem_load_ != nullptr) { @@ -499,20 +499,20 @@ Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_in rtError_t rt_ret = rtMalloc(&dev_mem_load_, proto_size, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "load dump information.", proto_size) rt_ret = rtMemcpy(dev_mem_load_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtDatadumpInfoLoad(dev_mem_load_, proto_size); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } load_flag_ = true; @@ -525,8 +525,8 @@ Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_ size_t proto_size = op_mapping_info.ByteSizeLong(); bool ret = op_mapping_info.SerializeToString(&proto_str); if (!ret || proto_size == 0) { - GELOGE(FAILED, "Protobuf SerializeToString failed, proto size %zu.", proto_size); - return FAILED; + GELOGE(PARAM_INVALID, "Protobuf SerializeToString failed, proto size %zu.", proto_size); + return PARAM_INVALID; } if (dev_mem_unload_ != nullptr) { @@ -537,20 +537,20 @@ Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_ rtError_t rt_ret = rtMalloc(&dev_mem_unload_, proto_size, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "unload dump information.", proto_size) rt_ret = rtMemcpy(dev_mem_unload_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtDatadumpInfoLoad(dev_mem_unload_, proto_size); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } load_flag_ = false; GELOGI("UnloadDumpInfo success, proto size is: %zu.", proto_size); @@ -588,18 +588,20 @@ Status DataDumper::LoadDumpInfo() { task.mutable_op()->set_op_type(op_desc->GetType()); if (dump_properties_.GetDumpMode() == kDumpOutput) { - if (DumpOutput(op_iter, task) != SUCCESS) { - GELOGE(FAILED, "Dump output failed"); - return FAILED; + Status ret = DumpOutput(op_iter, task); + if (ret != SUCCESS) { + GELOGE(ret, "Dump output failed"); + return ret; } op_mapping_info.mutable_task()->Add(std::move(task)); continue; } if (dump_properties_.GetDumpMode() == kDumpInput) { if (op_iter.is_task) { - if (DumpInput(op_iter, task) != SUCCESS) { - GELOGE(FAILED, "Dump input failed"); - return FAILED; + Status ret = DumpInput(op_iter, task); + if (ret != SUCCESS) { + GELOGE(ret, "Dump input failed"); + return ret; } } op_mapping_info.mutable_task()->Add(std::move(task)); @@ -608,14 +610,14 @@ Status DataDumper::LoadDumpInfo() { if (dump_properties_.GetDumpMode() == kDumpAll) { auto ret = DumpOutput(op_iter, task); if (ret != SUCCESS) { - GELOGE(FAILED, "Dump output failed when in dumping all"); - return FAILED; + GELOGE(ret, "Dump output failed when in dumping all"); + return ret; } if (op_iter.is_task) { ret = DumpInput(op_iter, task); if (ret != SUCCESS) { - GELOGE(FAILED, "Dump input failed when in dumping all"); - return FAILED; + GELOGE(ret, "Dump input failed when in dumping all"); + return ret; } } op_mapping_info.mutable_task()->Add(std::move(task)); @@ -630,8 +632,8 @@ Status DataDumper::LoadDumpInfo() { if (!op_list_.empty() || is_op_debug_) { auto ret = ExecuteLoadDumpInfo(op_mapping_info); if (ret != SUCCESS) { - GELOGE(FAILED, "Execute load dump info failed"); - return FAILED; + GELOGE(ret, "Execute load dump info failed"); + return ret; } } return SUCCESS; @@ -702,8 +704,8 @@ Status DataDumper::UnloadDumpInfo() { } auto ret = ExecuteUnLoadDumpInfo(op_mapping_info); if (ret != SUCCESS) { - GELOGE(FAILED, "Execute unload dump info failed"); - return FAILED; + GELOGE(ret, "Execute unload dump info failed"); + return ret; } return SUCCESS; } diff --git a/src/ge/graph/load/new_model_manager/davinci_model.cc b/src/ge/graph/load/new_model_manager/davinci_model.cc index c43c37eb..c3eb7247 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.cc +++ b/src/ge/graph/load/new_model_manager/davinci_model.cc @@ -36,6 +36,7 @@ #include "common/scope_guard.h" #include "common/thread_pool.h" #include "framework/common/debug/ge_log.h" +#include "graph/common/ge_call_wrapper.h" #include "graph/compute_graph.h" #include "graph/debug/ge_attr_define.h" #include "graph/ge_context.h" @@ -46,7 +47,6 @@ #include "graph/manager/graph_var_manager.h" #include "graph/manager/trans_var_data_utils.h" #include "graph/manager/util/debug.h" -#include "graph/common/ge_call_wrapper.h" #include "graph/model_serialize.h" #include "graph/node.h" #include "graph/utils/graph_utils.h" @@ -58,8 +58,8 @@ #include "runtime/dev.h" #include "runtime/event.h" #include "runtime/mem.h" -#include "runtime/stream.h" #include "runtime/rt_model.h" +#include "runtime/stream.h" #include "securec.h" // create std::thread, catch exceptions using try/catch @@ -149,7 +149,6 @@ DavinciModel::~DavinciModel() { saved_task_addrs_.clear(); GE_CHK_STATUS(ModelRunStop()); - UnbindTaskSinkStream(); op_list_.clear(); data_op_list_.clear(); @@ -157,32 +156,37 @@ DavinciModel::~DavinciModel() { tensor_name_to_fixed_addr_size_.clear(); tensor_name_to_peer_output_index_.clear(); GE_DELETE_NEW_SINGLE(data_inputer_); + // check rt ctx is exist. rt api call will cause error log when ctx not exist + rtContext_t ctx = nullptr; + rtError_t rt_ret = rtCtxGetCurrent(&ctx); + if (rt_ret == RT_ERROR_NONE) { + UnbindTaskSinkStream(); + for (size_t i = 0; i < label_list_.size(); ++i) { + if (label_list_[i] != nullptr) { + GE_LOGW_IF(rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE, "Destroy label failed, index: %zu", i); + } + } - for (size_t i = 0; i < label_list_.size(); ++i) { - if (label_list_[i] != nullptr) { - GE_LOGW_IF(rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE, "Destroy label failed, index: %zu", i); + for (size_t i = 0; i < stream_list_.size(); ++i) { + GE_LOGW_IF(rtStreamDestroy(stream_list_[i]) != RT_ERROR_NONE, "Destroy stream failed, index: %zu", i); } - } - for (size_t i = 0; i < stream_list_.size(); ++i) { - GE_LOGW_IF(rtStreamDestroy(stream_list_[i]) != RT_ERROR_NONE, "Destroy stream failed, index: %zu", i); - } + for (size_t i = 0; i < event_list_.size(); ++i) { + GE_LOGW_IF(rtEventDestroy(event_list_[i]) != RT_ERROR_NONE, "Destroy event failed, index: %zu", i); + } - for (size_t i = 0; i < event_list_.size(); ++i) { - GE_LOGW_IF(rtEventDestroy(event_list_[i]) != RT_ERROR_NONE, "Destroy event failed, index: %zu", i); - } + FreeWeightsMem(); - FreeWeightsMem(); + FreeFeatureMapMem(); - FreeFeatureMapMem(); + if (rt_model_handle_ != nullptr) { + GE_CHK_RT(rtModelDestroy(rt_model_handle_)); + rt_model_handle_ = nullptr; + } + } OpDebugUnRegister(); - if (rt_model_handle_ != nullptr) { - GE_CHK_RT(rtModelDestroy(rt_model_handle_)); - rt_model_handle_ = nullptr; - } - GELOGI("do ReleaseTask"); ReleaseTask(); CleanTbeHandle(); @@ -192,9 +196,7 @@ DavinciModel::~DavinciModel() { if (args_ != nullptr) { GE_CHK_RT(rtFree(args_)); } - if (args_host_ != nullptr) { - GE_CHK_RT(rtFreeHost(args_host_)); - } + total_io_addrs_.clear(); if (fixed_addrs_ != nullptr) { GE_CHK_RT(rtFree(fixed_addrs_)); } @@ -257,7 +259,7 @@ void DavinciModel::Shrink() { Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { if (is_model_has_inited_) { - GELOGI("call InitModelMem more than once ."); + GELOGE(FAILED, "call InitModelMem more than once ."); return FAILED; } is_model_has_inited_ = true; @@ -285,7 +287,8 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p if (TotalMemSize() && mem_base_ == nullptr) { mem_base_ = MallocFeatureMapMem(data_size); if (mem_base_ == nullptr) { - return FAILED; + GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size); + return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED; } GELOGI("[IMAS]InitModelMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, mem_base_, data_size); @@ -302,7 +305,8 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p if (weight_ptr == nullptr) { weights_mem_base_ = MallocWeightsMem(weights_size); if (weights_mem_base_ == nullptr) { - return FAILED; + GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size); + return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED; } is_inner_weight_base_ = true; } @@ -312,7 +316,7 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p GELOGI("copy weights data to device"); } - GE_CHK_STATUS_RET(InitVariableMem(), "init variable mem failed."); + GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed."); runtime_param_.mem_base = mem_base_; runtime_param_.weight_base = weights_mem_base_; return SUCCESS; @@ -324,7 +328,7 @@ Status DavinciModel::InitVariableMem() { if (TotalVarMemSize() && var_mem_base_ == nullptr) { Status ret = VarManager::Instance(session_id_)->MallocVarMemory(TotalVarMemSize()); if (ret != SUCCESS) { - GELOGE(ret, "Malloc Var Memory Fail."); + GELOGE(ret, "Malloc variable memory failed."); return ret; } var_mem_base_ = VarManager::Instance(session_id_)->GetVarMemoryBase(RT_MEMORY_HBM); @@ -456,7 +460,7 @@ Status DavinciModel::SetTSDevice() { rtError_t rt_ret = rtSetTSDevice(core_type); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "SetTSDevice failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } return SUCCESS; } @@ -470,7 +474,7 @@ Status DavinciModel::OpDebugRegister() { rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } uint64_t debug_addrs_tmp = static_cast(reinterpret_cast(op_debug_addr_)); @@ -479,12 +483,12 @@ Status DavinciModel::OpDebugRegister() { rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "rtMemcpy to p2p_addr error: 0x%X", rt_ret); - return FAILED; + GELOGE(RT_FAILED, "rtMemcpy to p2p_addr error: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } uint32_t op_debug_mode = 0; @@ -495,7 +499,7 @@ Status DavinciModel::OpDebugRegister() { rt_ret = rtDebugRegister(rt_model_handle_, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "rtDebugRegister error, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } GELOGI("debug_task_id:%d, debug_stream_id:%u", debug_task_id, debug_stream_id); is_op_debug_reg_ = true; @@ -510,7 +514,6 @@ void DavinciModel::OpDebugUnRegister() { GELOGI("OpDebugUnRegister, is_op_debug_reg_ = %d", is_op_debug_reg_); if (is_op_debug_reg_) { debug_reg_mutex_.unlock(); - rtError_t rt_ret = RT_ERROR_NONE; if (rt_model_handle_ != nullptr) { rt_ret = rtDebugUnRegister(rt_model_handle_); @@ -534,10 +537,8 @@ void DavinciModel::OpDebugUnRegister() { } p2p_debug_addr_ = nullptr; } - is_op_debug_reg_ = false; } - return; } @@ -615,7 +616,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size if (!known_node_) { GE_CHK_STATUS_RET_NOLOG(InitModelMem(dev_ptr, mem_size, weight_ptr, weight_size)); data_inputer_ = new (std::nothrow) DataInputer(); - GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, INTERNAL_ERROR, "data_inputer_ is nullptr."); + GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr."); } GE_TIMESTAMP_END(InitModelMem, "GraphLoader::InitModelMem"); @@ -640,9 +641,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size GELOGI("Infer profiling: op_name_size(%zu)", op_name.size()); } - if (InitNodes(compute_graph) != SUCCESS) { - return FAILED; - } + GE_CHK_STATUS_RET(InitNodes(compute_graph), "Init nodes failed"); SetDataDumperArgs(compute_graph); GE_TIMESTAMP_START(DoTaskSink); @@ -812,9 +811,9 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { rtError_t rt_ret = rtMemcpy(addr, size, tensor_device_addrs.data(), size, RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "rtMemcpy error"); + GELOGE(RT_FAILED, "rtMemcpy error, ret: 0x%X", rt_ret); GE_CHK_RT(rtFree(addr)); - return FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } saved_task_addrs_.emplace(op_desc, addr); } @@ -831,9 +830,10 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { op_desc->GetName().c_str(), op_desc->GetType().c_str()); continue;); - if (InitTbeHandle(op_desc) != SUCCESS) { - GELOGE(PARAM_INVALID, "TBE init failed. %s", op_desc->GetName().c_str()); - return PARAM_INVALID; + Status status = InitTbeHandle(op_desc); + if (status != SUCCESS) { + GELOGE(status, "TBE init failed. %s", op_desc->GetName().c_str()); + return status; } } GE_TIMESTAMP_ADD(InitTbeHandle); @@ -876,18 +876,35 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index) { // Make information for copy input data. const vector output_size_list = ModelUtils::GetOutputSize(op_desc); const vector virtual_addr_list = ModelUtils::GetOutputDataAddrs(runtime_param_, op_desc); - if (output_size_list.empty() || virtual_addr_list.empty() || (output_size_list.size() != virtual_addr_list.size())) { - GELOGE(PARAM_INVALID, "Data[%s] init failed: Output size is %zu, Output addr is %zu", op_desc->GetName().c_str(), - output_size_list.size(), virtual_addr_list.size()); + const vector output_offset_list = op_desc->GetOutputOffset(); + if (output_offset_list.size() != virtual_addr_list.size()) { + GELOGE(PARAM_INVALID, "virtual_addr size:%zu should be equal to offset size:%zu.", virtual_addr_list.size(), + output_offset_list.size()); return PARAM_INVALID; } - auto data_index = data_op_index; if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { GELOGI("ge_train: get new index %u, old %u", data_index, data_op_index); } - input_data_info_[data_index] = {output_size_list[kDataIndex], virtual_addr_list[kDataIndex]}; - SetInputOutsideAddr(virtual_addr_list); + bool fusion_flag = false; + ZeroCopyOffset zero_copy_offset; + Status ret = zero_copy_offset.InitInputDataInfo(output_size_list, virtual_addr_list, op_desc, fusion_flag); + if (ret != SUCCESS) { + GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str()); + return PARAM_INVALID; + } + new_input_data_info_[data_index] = zero_copy_offset; + + for (size_t index = 0; index < virtual_addr_list.size(); ++index) { + void *addr = virtual_addr_list.at(index); + if (new_input_outside_addrs_.find(addr) != new_input_outside_addrs_.end()) { + continue; + } + zero_copy_offset.SetInputOutsideAddrs(output_offset_list, addr, index, fusion_flag, real_virtual_addrs_); + new_input_outside_addrs_[addr] = zero_copy_offset; + } + + GELOGI("SetInputOutsideAddr success."); data_op_index++; if (InitInputZeroCopyNodes(node) != SUCCESS) { GELOGE(PARAM_INVALID, "Input zero copy nodes init failed!"); @@ -936,6 +953,7 @@ Status DavinciModel::InitInputZeroCopyNodes(const NodePtr &node) { Status DavinciModel::InitNetOutput(const NodePtr &node) { // node->GetOpDesc Checked by Init: NetOutput, valid. auto op_desc = node->GetOpDesc(); + // excludes the function op sub graph, e.g. case,if if (known_node_) { output_op_list_.push_back(op_desc); return SUCCESS; @@ -952,6 +970,11 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { // Make information for copy output data. const vector input_size_list = ModelUtils::GetInputSize(op_desc); const vector virtual_addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); + const vector input_offset_list = op_desc->GetInputOffset(); + if (input_offset_list.size() != virtual_addr_list.size()) { + GELOGE(PARAM_INVALID, "virtual_addr size should be equal to offset size."); + return PARAM_INVALID; + } if (input_size_list.empty() && virtual_addr_list.empty()) { GELOGI("NetOutput[%s] is empty.", op_desc->GetName().c_str()); return SUCCESS; @@ -962,20 +985,33 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { return PARAM_INVALID; } - size_t num = output_data_info_.size(); + size_t num = new_output_data_info_.size(); + bool fusion_flag = false; + for (size_t idx = 0; idx < input_size_list.size(); ++idx) { - int64_t size = input_size_list[idx]; - auto tensor_desc = op_desc->GetInputDescPtr(idx); - if ((tensor_desc == nullptr) || (TensorUtils::GetTensorSizeInBytes(*tensor_desc, size) != GRAPH_SUCCESS)) { - GELOGE(FAILED, "GetTensorSizeInBytes failed!"); - return FAILED; + ZeroCopyOffset zero_copy_offset; + Status ret = zero_copy_offset.InitOutputDataInfo(input_size_list, virtual_addr_list, op_desc, idx, fusion_flag); + if (ret != SUCCESS) { + GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str()); + return PARAM_INVALID; } - - GELOGI("Tensor data size: GetSize=%ld, GetTensorSizeInBytes=%ld", input_size_list[idx], size); - output_data_info_[num + idx] = {size, virtual_addr_list[idx]}; + new_output_data_info_[num + idx] = zero_copy_offset; + void *addr = virtual_addr_list.at(idx); + int64_t input_offset = input_offset_list.at(idx); + if (new_output_outside_addrs_.find(addr) != new_output_outside_addrs_.end()) { + continue; + } + vector tensor_addrs; + zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs); + new_output_outside_addrs_[addr] = zero_copy_offset; + for (size_t i = 0; i < tensor_addrs.size(); ++i) { + void *real_addr = tensor_addrs.at(i); + DisableZeroCopy(real_addr); + real_virtual_addrs_.emplace_back(real_addr); + } + GELOGI("SetOutputOutsideAddr success."); } - SetOutputOutsideAddr(virtual_addr_list); if (InitOutputZeroCopyNodes(node) != SUCCESS) { GELOGE(PARAM_INVALID, "Output zero copy nodes init failed!"); return PARAM_INVALID; @@ -1082,8 +1118,8 @@ Status DavinciModel::InitVariable(const OpDescPtr &op_desc) { Status DavinciModel::SetQueIds(const std::vector &input_queue_ids, const std::vector &output_queue_ids) { if (input_queue_ids.empty() && output_queue_ids.empty()) { - GELOGE(PARAM_INVALID, "Param is empty"); - return PARAM_INVALID; + GELOGE(GE_EXEC_MODEL_QUEUE_ID_INVALID, "Param is empty"); + return GE_EXEC_MODEL_QUEUE_ID_INVALID; } input_queue_ids_ = input_queue_ids; @@ -1103,26 +1139,26 @@ Status DavinciModel::LoadWithQueue() { return SUCCESS; } - if (input_queue_ids_.size() != input_data_info_.size()) { - GELOGE(PARAM_INVALID, "Input queue ids not match model: input_queue=%zu input_data=%zu", input_queue_ids_.size(), - input_data_info_.size()); - return PARAM_INVALID; + if (input_queue_ids_.size() != new_input_data_info_.size()) { + GELOGE(GE_EXEC_MODEL_QUEUE_ID_INVALID, "Input queue ids not match model: input_queue=%zu input_data=%zu", + input_queue_ids_.size(), new_input_data_info_.size()); + return GE_EXEC_MODEL_QUEUE_ID_INVALID; } - if (output_queue_ids_.size() != output_data_info_.size()) { - GELOGE(PARAM_INVALID, "Output queue ids not match model: output_queue=%zu output_data=%zu", - output_queue_ids_.size(), output_data_info_.size()); - return PARAM_INVALID; + if (output_queue_ids_.size() != new_output_data_info_.size()) { + GELOGE(GE_EXEC_MODEL_QUEUE_ID_INVALID, "Output queue ids not match model: output_queue=%zu output_data=%zu", + output_queue_ids_.size(), new_output_data_info_.size()); + return GE_EXEC_MODEL_QUEUE_ID_INVALID; } GE_CHK_STATUS_RET(AddHeadStream(), "Add head stream failed."); // Binding input_queue and Data Op. GE_CHK_STATUS_RET(BindInputQueue(), "Launch bind input queue failed."); - GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, input_outside_addrs_), "Launch zero copy failed."); + GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, new_input_outside_addrs_), "Launch zero copy failed."); // Binding output_queue and NetOutput Op. GE_CHK_STATUS_RET(BindOutputQueue(), "Launch bind output queue failed."); - GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, output_outside_addrs_), "Launch zero copy failed."); + GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, new_output_outside_addrs_), "Launch zero copy failed."); GE_CHK_STATUS_RET(CpuActiveStream(), "Launch active entry stream failed."); GE_CHK_STATUS_RET(CpuWaitEndGraph(), "Launch wait end graph failed."); @@ -1138,20 +1174,26 @@ Status DavinciModel::LoadWithQueue() { Status DavinciModel::BindInputQueue() { // Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size() for (size_t i = 0; i < input_queue_ids_.size(); ++i) { - auto it = input_data_info_.find(i); - if (it == input_data_info_.end()) { - GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", input_data_info_.size(), i); + auto it = new_input_data_info_.find(i); + if (it == new_input_data_info_.end()) { + GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", new_input_data_info_.size(), i); return FAILED; } uint32_t queue_id = input_queue_ids_[i]; - uint32_t data_size = static_cast(it->second.first); - uintptr_t data_addr = reinterpret_cast(it->second.second); + if (it->second.GetDataInfo().empty()) { + GELOGE(INTERNAL_ERROR, "the %zu input_queue not set data_info.", i); + return INTERNAL_ERROR; + } + uint32_t data_size = static_cast(it->second.GetDataInfo().at(0).first); + uintptr_t data_addr = reinterpret_cast(it->second.GetDataInfo().at(0).second); GELOGI("BindInputToQueue: graph_%u index[%zu] queue id[%u] output addr[0x%lx] output size[%u]", runtime_param_.graph_id, i, queue_id, data_addr, data_size); - if (rtModelBindQueue(rt_model_handle_, queue_id, RT_MODEL_INPUT_QUEUE) != RT_ERROR_NONE) { - return INTERNAL_ERROR; + rtError_t rt_ret = rtModelBindQueue(rt_model_handle_, queue_id, RT_MODEL_INPUT_QUEUE); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rtModelBindQueue failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } if (CpuModelDequeue(queue_id) != SUCCESS) { @@ -1170,14 +1212,15 @@ Status DavinciModel::CpuModelDequeue(uint32_t queue_id) { GELOGI("Set CpuKernel model dequeue task enter."); std::shared_ptr dequeue_task = MakeShared(rt_entry_stream_); if (dequeue_task == nullptr) { - GELOGE(FAILED, "Make CpuTaskModelDequeue task failed."); - return FAILED; + GELOGE(MEMALLOC_FAILED, "Make CpuTaskModelDequeue task failed."); + return MEMALLOC_FAILED; } // Get DataOp Output address and bind to queue. uintptr_t in_mbuf = 0; - if (dequeue_task->Init(queue_id, in_mbuf) != SUCCESS) { - return FAILED; + Status status = dequeue_task->Init(queue_id, in_mbuf); + if (status != SUCCESS) { + return status; } cpu_task_list_.push_back(dequeue_task); @@ -1187,16 +1230,18 @@ Status DavinciModel::CpuModelDequeue(uint32_t queue_id) { } Status DavinciModel::CpuTaskModelZeroCopy(std::vector &mbuf_list, - std::map> &outside_addrs) { + std::map &outside_addrs) { GELOGI("Set CpuKernel model zero_copy task enter."); std::shared_ptr zero_copy = MakeShared(rt_entry_stream_); if (zero_copy == nullptr) { - GELOGE(FAILED, "Make CpuTaskZeroCopy task failed."); - return FAILED; + GELOGE(MEMALLOC_FAILED, "Make CpuTaskZeroCopy task failed."); + return MEMALLOC_FAILED; } - if (zero_copy->Init(mbuf_list, outside_addrs) != SUCCESS) { - return FAILED; + // mdc zero_copy not support l2 fusion + Status status = zero_copy->Init(mbuf_list, outside_addrs); + if (status != SUCCESS) { + return status; } cpu_task_list_.push_back(zero_copy); GELOGI("Set CpuKernel model zero_copy task success."); @@ -1209,23 +1254,31 @@ Status DavinciModel::CpuTaskModelZeroCopy(std::vector &mbuf_list, Status DavinciModel::BindOutputQueue() { // Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size() for (size_t i = 0; i < output_queue_ids_.size(); ++i) { - auto it = output_data_info_.find(i); - if (it == output_data_info_.end()) { - GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i); + auto it = new_output_data_info_.find(i); + if (it == new_output_data_info_.end()) { + GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i); return FAILED; } uint32_t queue_id = output_queue_ids_[i]; - uint32_t data_size = static_cast(it->second.first); - uintptr_t data_addr = reinterpret_cast(it->second.second); + if (it->second.GetDataInfo().empty()) { + GELOGE(INTERNAL_ERROR, "the %zu output_queue not set data_info.", i); + return INTERNAL_ERROR; + } + uint32_t data_size = static_cast(it->second.GetDataInfo().at(0).first); + uintptr_t data_addr = reinterpret_cast(it->second.GetDataInfo().at(0).second); GELOGI("BindOutputToQueue: graph_%u index[%zu] queue id[%u] input addr[0x%lx] input size[%u]", runtime_param_.graph_id, i, queue_id, data_addr, data_size); - if (rtModelBindQueue(rt_model_handle_, queue_id, RT_MODEL_OUTPUT_QUEUE) != RT_ERROR_NONE) { - return INTERNAL_ERROR; + rtError_t rt_ret = rtModelBindQueue(rt_model_handle_, queue_id, RT_MODEL_OUTPUT_QUEUE); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rtModelBindQueue failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } - if (CpuModelPrepareOutput(data_addr, data_size) != SUCCESS) { - return INTERNAL_ERROR; + + Status status = CpuModelPrepareOutput(data_addr, data_size); + if (status != SUCCESS) { + return status; } } @@ -1246,8 +1299,8 @@ Status DavinciModel::CpuModelPrepareOutput(uintptr_t addr, uint32_t size) { std::shared_ptr prepare_output = MakeShared(rt_entry_stream_); if (prepare_output == nullptr) { - GELOGE(FAILED, "Make CpuTaskPrepareOutput task failed."); - return FAILED; + GELOGE(MEMALLOC_FAILED, "Make CpuTaskPrepareOutput task failed."); + return MEMALLOC_FAILED; } uintptr_t out_mbuf = 0; @@ -1270,12 +1323,13 @@ Status DavinciModel::CpuActiveStream() { GELOGI("Set CpuKernel active stream task enter."); std::shared_ptr active_entry = MakeShared(rt_entry_stream_); if (active_entry == nullptr) { - GELOGE(FAILED, "Make CpuTaskActiveEntry task failed."); - return FAILED; + GELOGE(MEMALLOC_FAILED, "Make CpuTaskActiveEntry task failed."); + return MEMALLOC_FAILED; } - if (active_entry->Init(rt_head_stream_) != SUCCESS) { - return FAILED; + Status status = active_entry->Init(rt_head_stream_); + if (status != SUCCESS) { + return status; } cpu_task_list_.push_back(active_entry); @@ -1290,12 +1344,13 @@ Status DavinciModel::CpuWaitEndGraph() { GELOGI("Set CpuKernel wait end graph task enter."); std::shared_ptr wait_endgraph = MakeShared(rt_entry_stream_); if (wait_endgraph == nullptr) { - GELOGE(FAILED, "Make CpuTaskWaitEndGraph task failed."); - return FAILED; + GELOGE(MEMALLOC_FAILED, "Make CpuTaskWaitEndGraph task failed."); + return MEMALLOC_FAILED; } - if (wait_endgraph->Init(runtime_model_id_) != SUCCESS) { - return FAILED; + Status status = wait_endgraph->Init(runtime_model_id_); + if (status != SUCCESS) { + return status; } cpu_task_list_.push_back(wait_endgraph); @@ -1305,9 +1360,9 @@ Status DavinciModel::CpuWaitEndGraph() { Status DavinciModel::BindEnqueue() { for (size_t i = 0; i < output_queue_ids_.size(); ++i) { - auto it = output_data_info_.find(i); - if (it == output_data_info_.end()) { - GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i); + auto it = new_output_data_info_.find(i); + if (it == new_output_data_info_.end()) { + GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i); return FAILED; } @@ -1323,12 +1378,13 @@ Status DavinciModel::CpuModelEnqueue(uint32_t queue_id, uintptr_t out_mbuf) { GELOGI("Set CpuKernel model enqueue task enter."); std::shared_ptr model_enqueue = MakeShared(rt_entry_stream_); if (model_enqueue == nullptr) { - GELOGE(FAILED, "Make CpuTaskModelEnqueue task failed."); - return FAILED; + GELOGE(MEMALLOC_FAILED, "Make CpuTaskModelEnqueue task failed."); + return MEMALLOC_FAILED; } - if (model_enqueue->Init(queue_id, out_mbuf) != SUCCESS) { - return FAILED; + Status status = model_enqueue->Init(queue_id, out_mbuf); + if (status != SUCCESS) { + return status; } cpu_task_list_.push_back(model_enqueue); GELOGI("Set CpuKernel model enqueue task enter."); @@ -1342,12 +1398,13 @@ Status DavinciModel::CpuModelRepeat() { GELOGI("Set CpuKernel repeat task enter."); std::shared_ptr model_repeat = MakeShared(rt_entry_stream_); if (model_repeat == nullptr) { - GELOGE(FAILED, "Make CpuTaskModelRepeat task failed."); - return FAILED; + GELOGE(MEMALLOC_FAILED, "Make CpuTaskModelRepeat task failed."); + return MEMALLOC_FAILED; } - if (model_repeat->Init(runtime_model_id_) != SUCCESS) { - return FAILED; + Status status = model_repeat->Init(runtime_model_id_); + if (status != SUCCESS) { + return status; } cpu_task_list_.push_back(model_repeat); @@ -1390,15 +1447,29 @@ Status DavinciModel::GetInputOutputDescInfo(vector &input_d /// @ingroup ge /// @brief Get dynamic batch_info /// @param [out] batch_info +/// @param [out] dynamic_type /// @return execute result /// -Status DavinciModel::GetDynamicBatchInfo(std::vector> &batch_info) const { +Status DavinciModel::GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) const { + dynamic_type = dynamic_type_; batch_info = batch_info_; + return SUCCESS; } /// /// @ingroup ge +/// @brief Get combined dynamic dims info +/// @param [out] batch_info +/// @return None +/// +void DavinciModel::GetCombinedDynamicDims(std::vector> &batch_info) const { + batch_info.clear(); + batch_info = combined_batch_info_; +} + +/// +/// @ingroup ge /// @brief Get AIPP input info /// @param [in] index /// @param [out] aipp_info @@ -1432,7 +1503,7 @@ Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) { return SUCCESS; } -void DavinciModel::SetDynamicSize(const std::vector &batch_num) { +void DavinciModel::SetDynamicSize(const std::vector &batch_num, int32_t dynamic_type) { batch_size_.clear(); if (batch_num.empty()) { GELOGD("User has not set dynammic data"); @@ -1440,9 +1511,11 @@ void DavinciModel::SetDynamicSize(const std::vector &batch_num) { for (size_t i = 0; i < batch_num.size(); i++) { batch_size_.emplace_back(batch_num[i]); } + + dynamic_type_ = dynamic_type; } -void DavinciModel::GetCurShape(std::vector &batch_info) { +void DavinciModel::GetCurShape(std::vector &batch_info, int32_t &dynamic_type) { if (batch_size_.empty()) { GELOGD("User does not set dynamic size"); } @@ -1450,6 +1523,8 @@ void DavinciModel::GetCurShape(std::vector &batch_info) { GELOGI("Start to get current shape"); batch_info.emplace_back(batch_size_[i]); } + + dynamic_type = dynamic_type_; } void DavinciModel::GetModelAttr(std::vector &dynamic_output_shape_info) { @@ -1655,27 +1730,38 @@ ge::Format DavinciModel::GetFormat() { Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) { rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE; const std::vector &blobs = input_data.blobs; - for (const auto &data : input_data_info_) { + for (const auto &data : new_input_data_info_) { if (data.first >= blobs.size()) { GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", blobs.size(), - input_data_info_.size(), data.first, data.second.first); + new_input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first); return FAILED; } const DataBuffer &data_buf = blobs[data.first]; - void *mem_addr = data.second.second; - uint64_t mem_size = static_cast(data.second.first); - GE_CHK_BOOL_RET_STATUS(mem_size >= data_buf.length, PARAM_INVALID, - "input data size(%lu) does not match model required size(%lu), ret failed.", data_buf.length, - mem_size); - - GELOGI("[IMAS]CopyPlainData memcpy graph_%lu type[F] input[%lu] dst[%p] src[%p] mem_size[%lu] datasize[%lu]", - runtime_param_.graph_id, data.first, mem_addr, data_buf.data, mem_size, data_buf.length); if (data_buf.length == 0) { GELOGW("No data need to memcpy!"); return SUCCESS; } - GE_CHK_RT_RET(rtMemcpy(mem_addr, mem_size, data_buf.data, data_buf.length, kind)); + uint64_t data_size = data.second.GetDataSize(); + GE_CHK_BOOL_RET_STATUS(data_size >= data_buf.length, PARAM_INVALID, + "input data size(%lu) does not match model required size(%lu), ret failed.", data_buf.length, + data_size); + + for (size_t i = 0; i < data.second.GetDataCount(); ++i) { + void *mem_addr = data.second.GetDataInfo().at(i).second; + uint64_t mem_size = static_cast(data.second.GetDataInfo().at(i).first); + void *data_buf_addr = + reinterpret_cast(reinterpret_cast(data_buf.data) + data.second.GetRelativeOffset().at(i)); + + uint64_t data_buf_length = data_buf.length; + GELOGI( + "[IMAS]CopyPlainData memcpy graph_%lu type[F] input[%lu] dst[%p] src[%p] mem_size[%lu] datasize[%lu] " + "relative_offset is %ld", + runtime_param_.graph_id, data.first, mem_addr, data_buf_addr, mem_size, data_buf_length, + data.second.GetRelativeOffset().at(i)); + + GE_CHK_RT_RET(rtMemcpy(mem_addr, mem_size, data_buf_addr, data_buf_length, kind)); + } } return SUCCESS; @@ -2020,31 +2106,26 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r output_data.index = data_id; output_data.model_id = model_id_; - if (output_data.blobs.size() != output_data_info_.size()) { + if (output_data.blobs.size() != new_output_data_info_.size()) { GELOGE(FAILED, "Output data buffer num=%zu not equal model data num=%zu", output_data.blobs.size(), - output_data_info_.size()); + new_output_data_info_.size()); return FAILED; } std::vector &blobs = output_data.blobs; - for (const auto &output : output_data_info_) { + for (const auto &output : new_output_data_info_) { if (output.first >= blobs.size()) { GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", blobs.size(), - input_data_info_.size(), output.first, output.second.first); + new_input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first); return FAILED; } - if ((kind == RT_MEMCPY_DEVICE_TO_DEVICE) && (copy_only_addrs_.count(output.second.second) == 0)) { - continue; // Skip: Feed by zero copy. - } - DataBuffer &buffer = blobs[output.first]; - uint64_t mem_size = static_cast(output.second.first); + uint64_t mem_size = static_cast(output.second.GetDataSize()); if ((buffer.length == 0) || (mem_size == 0)) { GELOGI("Length of data is zero, No need copy. output tensor index=%u", output.first); continue; } - if (buffer.length < mem_size) { GELOGE(FAILED, "Tensor data size=%lu, buffer size=%u", mem_size, buffer.length); return FAILED; @@ -2052,9 +2133,24 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r GELOGW("Tensor data size=%lu, buffer size=%u", mem_size, buffer.length); } - GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%u]", - runtime_param_.graph_id, output.first, output.second.second, mem_size, buffer.length); - GE_CHK_RT_RET(rtMemcpy(buffer.data, buffer.length, output.second.second, mem_size, kind)); + for (size_t i = 0; i < output.second.GetDataCount(); ++i) { + if ((kind == RT_MEMCPY_DEVICE_TO_DEVICE) && + (copy_only_addrs_.count(output.second.GetDataInfo().at(i).second) == 0)) { + continue; // Skip: Feed by zero copy. + } + + uint64_t data_size = output.second.GetDataInfo().at(i).first; + uint64_t buffer_length = buffer.length; + void *buffer_addr = + reinterpret_cast(reinterpret_cast(buffer.data) + output.second.GetRelativeOffset().at(i)); + + GELOGI( + "[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%u] " + "relative_offset is %ld", + runtime_param_.graph_id, output.first, output.second.GetDataInfo().at(i).second, data_size, buffer_length, + output.second.GetRelativeOffset().at(i)); + GE_CHK_RT_RET(rtMemcpy(buffer_addr, buffer_length, output.second.GetDataInfo().at(i).second, data_size, kind)); + } } DumpOpInputOutput(); @@ -2519,25 +2615,25 @@ void DavinciModel::UnbindTaskSinkStream() { Status DavinciModel::CreateKnownZeroCopyMap(const vector &inputs, const vector &outputs) { GELOGI("DavinciModel::CreateKnownZeroCopyMap in."); - if (inputs.size() != data_op_list_.size()) { - GELOGE(FAILED, "input data addr %u is not equal to input op number %u.", inputs.size(), data_op_list_.size()); + if (inputs.size() > data_op_list_.size()) { + GELOGE(FAILED, "input data addr %u should less than input op number %u.", inputs.size(), data_op_list_.size()); return FAILED; } // remove zero copy addr in last iteration knonw_input_data_info_.clear(); knonw_output_data_info_.clear(); - for (size_t i = 0; i < data_op_list_.size(); ++i) { + for (size_t i = 0; i < inputs.size(); ++i) { const vector addr_list = ModelUtils::GetOutputDataAddrs(runtime_param_, data_op_list_[i]); knonw_input_data_info_[addr_list[kDataIndex]] = inputs[i]; GELOGI("DavinciModel::CreateKnownZeroCopyMap input %d,v addr %p,p addr %p .", i, addr_list[kDataIndex], inputs[i]); } - if (output_op_list_.size() != kOutputNum) { - GELOGE(FAILED, "output op num is %u, not equal %u.", outputs.size(), kOutputNum); + if (output_op_list_.size() < kOutputNum) { + GELOGE(FAILED, "output op num is %u, not less than %u.", outputs.size(), kOutputNum); return FAILED; } const vector addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, output_op_list_[kDataIndex]); - if (outputs.size() != addr_list.size()) { - GELOGE(FAILED, "output data addr %u is not equal to output op number %u.", outputs.size(), addr_list.size()); + if (outputs.size() > addr_list.size()) { + GELOGE(FAILED, "output data addr %u should less than output op number %u.", outputs.size(), addr_list.size()); return FAILED; } for (size_t i = 0; i < addr_list.size(); ++i) { @@ -2548,30 +2644,20 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector &inputs, const return SUCCESS; } -Status DavinciModel::UpdateKnownZeroCopyAddr(vector &io_addrs, uint32_t args_offset) { - for (size_t i = 0; i < io_addrs.size(); ++i) { - auto it_in = knonw_input_data_info_.find(io_addrs[i]); +Status DavinciModel::UpdateKnownZeroCopyAddr() { + for (size_t i = 0; i < total_io_addrs_.size(); ++i) { + auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]); if (it_in != knonw_input_data_info_.end()) { - GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %d,v addr %p,p addr %p .", i, io_addrs[i], - knonw_input_data_info_.at(io_addrs[i])); - io_addrs[i] = knonw_input_data_info_.at(io_addrs[i]); + GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %d,v addr %p,p addr %p .", i, total_io_addrs_[i], + knonw_input_data_info_.at(total_io_addrs_[i])); + total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]); } - auto it_out = knonw_output_data_info_.find(io_addrs[i]); + auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]); if (it_out != knonw_output_data_info_.end()) { - GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %d,v addr %p,p addr %p .", i, io_addrs[i], - knonw_output_data_info_.at(io_addrs[i])); - io_addrs[i] = knonw_output_data_info_.at(io_addrs[i]); - } - } - // may args_size is equal to src_args_size? - uint32_t src_args_size = io_addrs.size() * sizeof(uint64_t); - GELOGI("DavinciModel::UpdateKnownZeroCopyAddr args host %p, src_args_size %u, args_offset %u", args_host_, - src_args_size, args_offset); - errno_t sec_ret = - memcpy_s(static_cast(args_host_) + args_offset, src_args_size, io_addrs.data(), src_args_size); - if (sec_ret != EOK) { - GELOGE(FAILED, "Call memcpy_s failed, ret: %d", sec_ret); - return FAILED; + GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %d,v addr %p,p addr %p .", i, total_io_addrs_[i], + knonw_output_data_info_.at(total_io_addrs_[i])); + total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]); + } } GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success."); return SUCCESS; @@ -2581,20 +2667,31 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector &inputs, const vec GELOGI("DavinciModel::UpdateKnownNodeArgs in"); GE_CHK_STATUS_RET(CreateKnownZeroCopyMap(inputs, outputs), "DavinciModel::UpdateKnownNodeArgs create map for input/output zero copy."); - for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { - auto &task = task_list_[task_index]; - if (task != nullptr) { - Status ret = task->UpdateArgs(); - if (ret != SUCCESS) { - GELOGE(FAILED, "task %d created by davinci model is nullptr.", task_index); - return FAILED; + if (!base_addr_not_changed_) { + total_io_addrs_.clear(); + orig_total_io_addrs_.clear(); + for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { + auto &task = task_list_[task_index]; + if (task != nullptr) { + Status ret = task->UpdateArgs(); + if (ret != SUCCESS) { + GELOGE(FAILED, "task %d created by davinci model is nullptr.", task_index); + return FAILED; + } } } + // cache latest iterator io addr + orig_total_io_addrs_ = total_io_addrs_; + } else { + total_io_addrs_ = orig_total_io_addrs_; } - GELOGI("DavinciModel::UpdateKnownNodeArgs device args %p, size %u, host args %p, size %u", args_, total_args_size_, - args_host_, total_args_size_); - // copy continuous args from host to device - Status rt_ret = rtMemcpy(args_, total_args_size_, args_host_, total_args_size_, RT_MEMCPY_HOST_TO_DEVICE); + GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed."); + + uint32_t total_addr_size = total_io_addrs_.size() * sizeof(uint64_t); + GELOGI("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, src size %u", args_, total_args_size_, + total_addr_size); + + Status rt_ret = rtMemcpy(args_, total_args_size_, total_io_addrs_.data(), total_addr_size, RT_MEMCPY_HOST_TO_DEVICE); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;) GELOGI("DavinciModel::UpdateKnownNodeArgs success"); @@ -2602,7 +2699,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector &inputs, const vec } Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) { - GELOGI("InitTaskInfo in,task size %d", model_task_def.task().size()); + GELOGI("InitTaskInfo in, task size %zu", model_task_def.task().size()); task_list_.resize(model_task_def.task_size()); for (int i = 0; i < model_task_def.task_size(); ++i) { // dynamic shape will create task_list_ before @@ -2643,13 +2740,7 @@ Status DavinciModel::MallocKnownArgs() { rtError_t rt_ret = rtMalloc(&args_, total_args_size_, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); - return RT_FAILED; - } - // malloc args host memory - rt_ret = rtMallocHost(&args_host_, total_args_size_); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtMallocHost failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } // malloc fixed addr memory, eg: rts op @@ -2658,7 +2749,7 @@ Status DavinciModel::MallocKnownArgs() { rt_ret = rtMalloc(&fixed_addrs_, total_fixed_addr_size_, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } } @@ -2753,49 +2844,22 @@ void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) { /// /// @ingroup ge -/// @brief Save Data address info for ZeroCopy. -/// @param [in] const std::vector &outside_addrs -/// @return None. -/// -void DavinciModel::SetInputOutsideAddr(const std::vector &outside_addrs) { - for (auto addr : outside_addrs) { - if (input_outside_addrs_.find(addr) != input_outside_addrs_.end()) { - continue; - } - - (void)input_outside_addrs_.emplace(std::pair>(addr, {})); - GELOGI("SetInputOutsideAddr success."); - } -} - -/// -/// @ingroup ge -/// @brief Save NetOutput address info for ZeroCopy. -/// @param [in] const std::vector &outside_addrs -/// @return None. -/// -void DavinciModel::SetOutputOutsideAddr(const std::vector &outside_addrs) { - for (auto addr : outside_addrs) { - if (output_outside_addrs_.find(addr) != output_outside_addrs_.end()) { - continue; - } - - DisableZeroCopy(addr); // Data to NetOutput directly. - output_outside_addrs_.emplace(std::pair>(addr, {})); - GELOGI("SetOutputOutsideAddr success."); - } -} - -/// -/// @ingroup ge /// @brief Set copy only for No task feed NetOutput address. /// @return None. /// void DavinciModel::SetCopyOnlyOutput() { - for (const auto &addrs : output_outside_addrs_) { - const auto &used_list = addrs.second; - if (used_list.empty()) { // No task feed Output addr, Need copy directly. - copy_only_addrs_.insert(addrs.first); + for (const auto &output_outside_addrs : new_output_outside_addrs_) { + ZeroCopyOffset output_outside = output_outside_addrs.second; + for (uint32_t out_count = 0; out_count < output_outside.GetAddrCount(); ++out_count) { + auto &addrs_mapping_list = output_outside.GetOutsideAddrs(); + std::map> virtual_args_addrs = addrs_mapping_list[out_count]; + for (const auto &virtual_args_addr : virtual_args_addrs) { + const auto &args_addrs = virtual_args_addr.second; + if (args_addrs.empty()) { // No task feed Output addr, Need copy directly. + GELOGI("[ZCPY] just copy %p to netoutput.", virtual_args_addr.first); + copy_only_addrs_.insert(virtual_args_addr.first); + } + } } } } @@ -2807,13 +2871,13 @@ void DavinciModel::SetCopyOnlyOutput() { /// @return None. /// void DavinciModel::DisableZeroCopy(const void *addr) { - if ((input_outside_addrs_.find(addr) == input_outside_addrs_.end()) && - (output_outside_addrs_.find(addr) == output_outside_addrs_.end())) { + if (find(real_virtual_addrs_.begin(), real_virtual_addrs_.end(), addr) == real_virtual_addrs_.end()) { return; } // Data link to RTS Op directly. std::lock_guard lock(outside_addrs_mutex_); + GELOGI("[ZCPY] disable zero copy of %p.", addr); copy_only_addrs_.insert(addr); } @@ -2831,29 +2895,28 @@ void DavinciModel::DisableZeroCopy(const void *addr) { void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector &outside_addrs, const void *info, void *args, size_t size, size_t offset) { // Internal call has ensured that op_desc is not nullptr + GELOGI("[ZCPY] SetZeroCopyAddr for %s.", op_desc->GetName().c_str()); size_t nums = outside_addrs.size(); ZeroCopyTask zero_copy_task(op_desc->GetName(), static_cast(args), size); for (size_t i = 0; i < nums; ++i) { std::lock_guard lock(outside_addrs_mutex_); - const uintptr_t addr_val = reinterpret_cast(outside_addrs[i]); - void *args_val = static_cast(args) + offset + i * kAddrLen; - auto it = input_outside_addrs_.find(outside_addrs[i]); - if (it != input_outside_addrs_.end()) { - GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset + i * kAddrLen), "Input args invalid."); - it->second.push_back(args_val); - SetBatchLabelAddr(op_desc, reinterpret_cast(args_val)); - GELOGI("[ZCPY] %s set copy input: %zu, addr: 0x%lx, args: %p, size: %zu, offset: %zu.", - op_desc->GetName().c_str(), i, addr_val, args, size, offset + i * kAddrLen); - continue; + + for (auto &input_outside_addrs : new_input_outside_addrs_) { + ZeroCopyOffset &input_outside = input_outside_addrs.second; + bool ret = input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); + if (ret) { + void *args_val = static_cast(args) + offset + i * kAddrLen; + SetBatchLabelAddr(op_desc, reinterpret_cast(args_val)); + } } - it = output_outside_addrs_.find(outside_addrs[i]); - if (it != output_outside_addrs_.end()) { - GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset + i * kAddrLen), "Output args invalid."); - it->second.push_back(args_val); - SetBatchLabelAddr(op_desc, reinterpret_cast(args_val)); - GELOGI("[ZCPY] %s set copy output: %zu, args: %p, addr: 0x%lx.", op_desc->GetName().c_str(), i, args, addr_val); - continue; + for (auto &output_outside_addrs : new_output_outside_addrs_) { + ZeroCopyOffset &output_outside = output_outside_addrs.second; + bool ret = output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); + if (ret) { + void *args_val = static_cast(args) + offset + i * kAddrLen; + SetBatchLabelAddr(op_desc, reinterpret_cast(args_val)); + } } } @@ -2941,12 +3004,13 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64 /// @return SUCCESS handle successfully / PARAM_INVALID for failed /// Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &output_data, bool is_dynamic) { - if (UpdateIoTaskArgs(input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { + if (UpdateIoTaskArgs(new_input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { GELOGE(PARAM_INVALID, "[ZCPY] Update input data to model failed."); return PARAM_INVALID; } - if (UpdateIoTaskArgs(output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { + if (UpdateIoTaskArgs(new_output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) != + SUCCESS) { GELOGE(PARAM_INVALID, "[ZCPY] Update output data to model failed."); return PARAM_INVALID; } @@ -2970,7 +3034,7 @@ Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &outp /// @param [in] batch_label: batch label for multi-batch scenes /// @return SUCCESS handle successfully / others handle failed /// -Status DavinciModel::UpdateIoTaskArgs(const map> &data_info, bool is_input, +Status DavinciModel::UpdateIoTaskArgs(const std::map &data_info, bool is_input, const vector &blobs, bool is_dynamic, const string &batch_label) { string input_or_output = "input"; is_input ? input_or_output = "input" : input_or_output = "output"; @@ -2986,8 +3050,6 @@ Status DavinciModel::UpdateIoTaskArgs(const map> input_or_output.c_str(), data.first, blobs.size()); return FAILED; } - int64_t size = data.second.first; // size of tensor. - void *addr = data.second.second; // addr of tensor. const DataBuffer &buffer = blobs[data.first]; // index of data. if (buffer.data == nullptr) { @@ -2995,26 +3057,35 @@ Status DavinciModel::UpdateIoTaskArgs(const map> return FAILED; } - GELOGI("[ZCPY] Copy Blobs: %u, addr: %p, size: %ld, data: %p, length: %lu.", data.first, data.second.second, - data.second.first, buffer.data, buffer.length); - if (!CheckInputAndModelSize(buffer.length, size, is_dynamic)) { + if (!CheckInputAndModelSize(buffer.length, data.second.GetDataSize(), is_dynamic)) { GELOGE(FAILED, "Check input size and model size failed"); return FAILED; } - // For input data, just copy for rts task. - if (is_input && copy_only_addrs_.count(addr) > 0) { - if (rtMemcpy(addr, size, buffer.data, buffer.length, RT_MEMCPY_DEVICE_TO_DEVICE) != RT_ERROR_NONE) { - GELOGE(FAILED, "Non-zero copy data node copy failed"); - return FAILED; + for (size_t count = 0; count < data.second.GetDataCount(); ++count) { + int64_t size = data.second.GetDataInfo().at(count).first; + void *addr = data.second.GetDataInfo().at(count).second; + void *buffer_addr = + reinterpret_cast(reinterpret_cast(buffer.data) + data.second.GetRelativeOffset().at(count)); + GELOGI("[ZCPY] Copy blobs_index %u, virtual_addr: %p, size: %ld, user_data_addr: %p", data.first, addr, size, + buffer_addr); + // For input data, just copy for rts task. + if (copy_only_addrs_.count(addr) > 0 && data.second.GetRelativeOffset().at(count) == 0) { + if (is_input) { + GELOGI("[IMAS] Find addr %p need direct copy from user malloc input %p", addr, buffer_addr); + if (rtMemcpy(addr, size, buffer_addr, size, RT_MEMCPY_DEVICE_TO_DEVICE) != RT_ERROR_NONE) { + GELOGE(FAILED, "Non-zero copy data node copy failed"); + return FAILED; + } + } + GELOGI("No need to exeucte zero copy task because this addr %p need direct copy.", addr); + continue; } - continue; - } - - for (ZeroCopyTask &task : zero_copy_tasks_) { - uintptr_t addr_val = reinterpret_cast(addr); - if (task.UpdateTaskParam(addr_val, buffer, zero_copy_batch_label_addrs_, batch_label) != SUCCESS) { - return FAILED; + for (ZeroCopyTask &task : zero_copy_tasks_) { + uintptr_t addr_val = reinterpret_cast(addr); + if (task.UpdateTaskParam(addr_val, buffer_addr, zero_copy_batch_label_addrs_, batch_label) != SUCCESS) { + return FAILED; + } } } } @@ -3263,7 +3334,10 @@ Status DavinciModel::InitStreamSwitchN(const OpDescPtr &op_desc) { GELOGI("StreamSwitchNOp node:%s, active_stream_id=%u.", op_desc->GetName().c_str(), active_stream_list[j]); } + (void)AttrUtils::GetInt(op_desc, ATTR_DYNAMIC_TYPE, dynamic_type_); + batch_info_.clear(); + combined_batch_info_.clear(); uint32_t batch_num = 0; if (!AttrUtils::GetInt(op_desc, ATTR_NAME_BATCH_NUM, batch_num)) { GELOGE(FAILED, "Failed to get attr ATTR_NAME_BATCH_NUM, StreamSwitchN: %s.", op_desc->GetName().c_str()); @@ -3279,6 +3353,11 @@ Status DavinciModel::InitStreamSwitchN(const OpDescPtr &op_desc) { return FAILED; } batch_info_.emplace_back(batch_shape); + batch_shape.clear(); + const string attr_combined_batch = ATTR_NAME_COMBINED_BATCH + "_" + std::to_string(i); + if (AttrUtils::GetListInt(op_desc, attr_combined_batch, batch_shape)) { + combined_batch_info_.emplace_back(batch_shape); + } } return SUCCESS; @@ -3352,7 +3431,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_PRE_PROC_START)); Status ret = CopyModelData(input_data, output_data, input_data.is_dynamic_batch); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return INTERNAL_ERROR, "Copy input data to model failed."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy input data to model failed. model id: %u", + model_id_); GELOGI("current_data.index=%u", input_data.index); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_PRE_PROC_END)); @@ -3361,7 +3441,7 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa GELOGD("rtModelExecute do"); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_INFER_START)); rtError_t rt_ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0); - GE_CHK_RT_EXEC(rt_ret, return INTERNAL_ERROR); + GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret)); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_INFER_END)); GELOGI("rtModelExecute end"); } @@ -3369,7 +3449,7 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa if (!is_async_mode_) { GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_AFTER_PROC_START)); ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return INTERNAL_ERROR, "Copy Output data to user failed."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy Output data to user failed."); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_AFTER_PROC_END)); } @@ -3401,12 +3481,13 @@ Status DavinciModel::AddHeadStream() { for (auto s : active_stream_list_) { std::shared_ptr active_entry = MakeShared(rt_head_stream_); if (active_entry == nullptr) { - GELOGE(FAILED, "Make CpuTaskActiveEntry task failed."); - return FAILED; + GELOGE(MEMALLOC_FAILED, "Make CpuTaskActiveEntry task failed."); + return MEMALLOC_FAILED; } - if (active_entry->Init(s) != SUCCESS) { - return FAILED; + Status status = active_entry->Init(s); + if (status != SUCCESS) { + return status; } cpu_task_list_.emplace_back(active_entry); @@ -3496,7 +3577,7 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) rtError_t rt_ret = rtCtxGetCurrent(&ctx); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Failed to get current context, error_code is: 0x%X.", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } std::vector variable_node_list; @@ -3693,7 +3774,8 @@ Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vectorGetInputDescPtr(kDataIndex)), data_input_size); GELOGD( - "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %u, tensor_size: %zu, format: %s, " + "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %u, tensor_size: %zu, format: " + "%s, " "data_type: %s, shape: %s .", index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), diff --git a/src/ge/graph/load/new_model_manager/davinci_model.h b/src/ge/graph/load/new_model_manager/davinci_model.h index 0f0b1e5c..d871fbf9 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.h +++ b/src/ge/graph/load/new_model_manager/davinci_model.h @@ -28,14 +28,15 @@ #include "common/helper/model_helper.h" #include "common/helper/om_file_helper.h" #include "common/opskernel/ge_task_info.h" -#include "common/types.h" #include "common/properties_manager.h" +#include "common/types.h" #include "framework/common/util.h" #include "graph/debug/ge_attr_define.h" +#include "graph/load/new_model_manager/aipp_utils.h" #include "graph/load/new_model_manager/data_dumper.h" #include "graph/load/new_model_manager/data_inputer.h" #include "graph/load/new_model_manager/model_utils.h" -#include "graph/load/new_model_manager/aipp_utils.h" +#include "graph/load/new_model_manager/zero_copy_offset.h" #include "graph/load/new_model_manager/zero_copy_task.h" #include "graph/model.h" #include "graph/node.h" @@ -285,11 +286,20 @@ class DavinciModel { /// @ingroup ge /// @brief Get dynamic batch_info /// @param [out] batch_info + /// @param [out] dynamic_type /// @return execute result /// - Status GetDynamicBatchInfo(std::vector> &batch_info) const; + Status GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) const; + + /// + /// @ingroup ge + /// @brief Get combined dynamic dims info + /// @param [out] batch_info + /// @return None + /// + void GetCombinedDynamicDims(std::vector> &batch_info) const; - void GetCurShape(std::vector &batch_info); + void GetCurShape(std::vector &batch_info, int32_t &dynamic_type); void GetModelAttr(std::vector &dynamic_output_shape_info); @@ -416,7 +426,7 @@ class DavinciModel { void SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector &outside_addrs, const void *info, void *args, size_t size, size_t offset); - void SetDynamicSize(const std::vector &batch_num); + void SetDynamicSize(const std::vector &batch_num, int32_t dynamic_type); bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; } @@ -456,6 +466,9 @@ class DavinciModel { void *cur_args = static_cast(args_) + offset; return cur_args; } + void SetTotalIOAddrs(vector &io_addrs) { + total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end()); + } void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size); int64_t GetFixedAddrsSize(string tensor_name); void *GetCurrentFixedAddr(int64_t offset) const { @@ -474,7 +487,8 @@ class DavinciModel { Status MallocKnownArgs(); Status UpdateKnownNodeArgs(const vector &inputs, const vector &outputs); Status CreateKnownZeroCopyMap(const vector &inputs, const vector &outputs); - Status UpdateKnownZeroCopyAddr(vector &io_addrs, uint32_t args_offset); + Status UpdateKnownZeroCopyAddr(); + void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); Status GetAllAippInputOutputDims(uint32_t index, std::vector &input_dims, @@ -513,22 +527,6 @@ class DavinciModel { /// /// @ingroup ge - /// @brief Save Data address info for ZeroCopy. - /// @param [in] const std::vector &outside_addrs - /// @return None. - /// - void SetInputOutsideAddr(const std::vector &outside_addrs); - - /// - /// @ingroup ge - /// @brief Save NetOutput address info for ZeroCopy. - /// @param [in] const std::vector &outside_addrs - /// @return None. - /// - void SetOutputOutsideAddr(const std::vector &outside_addrs); - - /// - /// @ingroup ge /// @brief Copy Check input size and model op size. /// @param [in] const int64_t &input_size: input size. /// @param [in] const int64_t &op_size: model op size. @@ -564,7 +562,7 @@ class DavinciModel { /// @param [in] batch_label: batch label for multi-batch scenes /// @return SUCCESS handle successfully / others handle failed /// - Status UpdateIoTaskArgs(const map> &data_info, bool is_input, + Status UpdateIoTaskArgs(const std::map &data_info, bool is_input, const vector &blobs, bool is_dynamic, const string &batch_label); Status CopyInputData(const InputData &input_data, bool device_data = false); @@ -706,8 +704,7 @@ class DavinciModel { /// Status BindInputQueue(); - Status CpuTaskModelZeroCopy(std::vector &mbuf_list, - std::map> &outside_addrs); + Status CpuTaskModelZeroCopy(std::vector &mbuf_list, std::map &outside_addrs); /// /// @ingroup ge @@ -816,8 +813,12 @@ class DavinciModel { vector variable_op_list_; - std::map> input_data_info_; // Virtual address from Data output. - std::map> output_data_info_; // Virtual address from NetOutput input. + std::map new_input_data_info_; + std::map new_output_data_info_; + std::map new_input_outside_addrs_; + std::map new_output_outside_addrs_; + + std::vector real_virtual_addrs_; // output op: save cce op actual needed memory size vector output_memory_size_list_; @@ -849,9 +850,7 @@ class DavinciModel { std::mutex outside_addrs_mutex_; std::vector zero_copy_tasks_; // Task used Data or NetOutput addr. std::set copy_only_addrs_; // Address need copy to original place. - // {node_addr, {addr_in_task_args}} - std::map> input_outside_addrs_; // Key is virtual address from Data. - std::map> output_outside_addrs_; // Key is virtual address from NetOutput. + // {op_id, batch_label} std::map zero_copy_op_id_batch_label_; // {batch_label, addrs} @@ -920,8 +919,13 @@ class DavinciModel { int64_t total_fixed_addr_size_ = 0; std::map knonw_input_data_info_; std::map knonw_output_data_info_; + vector total_io_addrs_; + vector orig_total_io_addrs_; + bool base_addr_not_changed_ = false; vector> batch_info_; + std::vector> combined_batch_info_; + int32_t dynamic_type_ = 0; vector batch_size_; // key: input tensor name, generally rts op; diff --git a/src/ge/graph/load/new_model_manager/model_manager.cc b/src/ge/graph/load/new_model_manager/model_manager.cc index 04c836dd..bd82417f 100644 --- a/src/ge/graph/load/new_model_manager/model_manager.cc +++ b/src/ge/graph/load/new_model_manager/model_manager.cc @@ -70,11 +70,11 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u auto kernel_size = sizeof(uint64_t) * (v_aicpu_kernel.size()); rtError_t rt_ret = rtMalloc(&aicpu_kernel_addr, kernel_size, RT_MEMORY_HBM); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); - return RT_FAILED;) + return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtMemcpy(aicpu_kernel_addr, kernel_size, v_aicpu_kernel.data(), kernel_size, RT_MEMCPY_HOST_TO_DEVICE); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); - GE_CHK_RT(rtFree(aicpu_kernel_addr)); return FAILED;) + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); + GE_CHK_RT(rtFree(aicpu_kernel_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);) uint64_t kernel_id_addr = static_cast(reinterpret_cast(aicpu_kernel_addr)); param_base.fwkKernelBase.fwk_kernel.kernelID = kernel_id_addr; // In the scene of loading once and running many times, the kernel needs to be destroyed many times, @@ -84,64 +84,64 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u rtError_t rt_ret = rtMalloc(&(devicebase), sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "malloc device memory failed."); + GELOGE(RT_FAILED, "malloc device memory failed. ret: 0x%X", rt_ret); GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); - return FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMemcpy(devicebase, sizeof(STR_FWK_OP_KERNEL), ¶m_base, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "memory copy to device failed."); + GELOGE(RT_FAILED, "memory copy to device failed. ret: 0x%X", rt_ret); GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); GE_CHK_RT(rtFree(devicebase)); - return FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } rtStream_t stream = nullptr; rt_ret = rtStreamCreate(&stream, 0); if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "create stream failed."); + GELOGE(RT_FAILED, "create stream failed. ret: 0x%X", rt_ret); GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); GE_CHK_RT(rtFree(devicebase)); - return FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtKernelLaunchEx(devicebase, sizeof(STR_FWK_OP_KERNEL), 0, stream); if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "rtKernelLaunchEx failed."); + GELOGE(RT_FAILED, "rtKernelLaunchEx failed. ret: 0x%X", rt_ret); GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); GE_CHK_RT(rtFree(devicebase)); GE_CHK_RT(rtStreamDestroy(stream)); - return FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtStreamSynchronize(stream); if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "rtStreamSynchronize failed."); + GELOGE(RT_FAILED, "rtStreamSynchronize failed. ret: 0x%X", rt_ret); GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); GE_CHK_RT(rtFree(devicebase)); GE_CHK_RT(rtStreamDestroy(stream)); - return FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } if (aicpu_kernel_addr != nullptr) { rt_ret = rtFree(aicpu_kernel_addr); if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "free memory failed."); + GELOGE(RT_FAILED, "free memory failed. ret: 0x%X", rt_ret); GE_CHK_RT(rtFree(devicebase)); GE_CHK_RT(rtStreamDestroy(stream)); - return FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } } rt_ret = rtFree(devicebase); if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "free memory failed."); + GELOGE(RT_FAILED, "free memory failed. ret: 0x%X", rt_ret); GE_CHK_RT(rtStreamDestroy(stream)); - return FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtStreamDestroy(stream); if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "rtStreamDestroy failed."); - return FAILED; + GELOGE(RT_FAILED, "rtStreamDestroy failed. ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } return SUCCESS; } @@ -168,8 +168,8 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { std::lock_guard lock(map_mutex_); auto it = model_map_.find(model_id); if (it == model_map_.end()) { - GELOGE(PARAM_INVALID, "model id %u does not exists.", model_id); - return PARAM_INVALID; + GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); + return GE_EXEC_MODEL_ID_INVALID; } uint64_t session_id = it->second->GetSessionId(); GELOGI("Destroy aicpu session for infer, session id is %u.", session_id); @@ -223,10 +223,11 @@ Status ModelManager::SetDevice(int32_t deviceId) const { return SUCCESS; } -ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector &batch_num) { +ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector &batch_num, + int32_t dynamic_type) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHECK_NOTNULL(davinci_model); - davinci_model->SetDynamicSize(batch_num); + davinci_model->SetDynamicSize(batch_num, dynamic_type); return SUCCESS; } @@ -332,8 +333,8 @@ Status ModelManager::DeleteModel(uint32_t id) { } else if (hybrid_model_it != hybrid_model_map_.end()) { (void)hybrid_model_map_.erase(hybrid_model_it); } else { - GELOGE(PARAM_INVALID, "model id %u does not exists.", id); - return PARAM_INVALID; + GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id); + return GE_EXEC_MODEL_ID_INVALID; } return SUCCESS; @@ -386,7 +387,7 @@ Status ModelManager::DataInput(const InputData &input_data, OutputData &output_d std::shared_ptr model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, "Invalid Model ID %u in InputData! ", model_id); + GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, "Invalid model id %u in InputData! ", model_id); GE_IF_BOOL_EXEC(model->GetDataInputTid() == 0, model->SetDataInputTid(mmGetTid())); @@ -422,7 +423,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector(inputs[i].length); + data.length = inputs[i].length; input_data.blobs.push_back(data); } @@ -442,7 +443,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vectorGetDataInputer(); GE_CHECK_NOTNULL(inputer); @@ -472,7 +473,7 @@ Status ModelManager::Start(uint32_t model_id) { std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to start! ", model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u to start! ", model_id); Status status = davinci_model->ModelRunStart(); if (status == SUCCESS) { @@ -499,7 +500,7 @@ Status ModelManager::Stop(uint32_t model_id) { } std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to stop!", model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u to stop!", model_id); Status status = davinci_model->ModelRunStop(); if (status == SUCCESS) { @@ -661,7 +662,7 @@ Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_siz } std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetMaxUsedMemory Failed, Invalid Model ID %u !", + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetMaxUsedMemory Failed, Invalid model id %u!", model_id); max_size = davinci_model->TotalMemSize(); @@ -671,8 +672,8 @@ Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_siz Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector &input_desc, vector &output_desc) { std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, - "GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", + model_id); return davinci_model->GetInputOutputDescInfo(input_desc, output_desc); } @@ -682,8 +683,8 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector &inputFormats, std::vector &outputFormats, bool new_model_desc) { std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, - "GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, + "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); davinci_model->SetModelDescVersion(new_model_desc); @@ -697,18 +698,35 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector> &batch_info) { +Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector> &batch_info, + int32_t &dynamic_type) { + std::shared_ptr davinci_model = GetModel(model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, + "GetDynamicBatchInfo failed, Invalid model id %u!", model_id); + + return davinci_model->GetDynamicBatchInfo(batch_info, dynamic_type); +} + +/// +/// @ingroup ge +/// @brief Get combined dynamic dims info +/// @param [in] model_id +/// @param [out] batch_info +/// @return execute result +/// +Status ModelManager::GetCombinedDynamicDims(const uint32_t model_id, vector> &batch_info) { std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetDynamicBatchInfo Failed, Invalid Model ID %u !", + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetCombinedDynamicDims Failed, Invalid Model ID %u!", model_id); - return davinci_model->GetDynamicBatchInfo(batch_info); + davinci_model->GetCombinedDynamicDims(batch_info); + return SUCCESS; } -Status ModelManager::GetCurShape(const uint32_t model_id, std::vector &batch_info) { +Status ModelManager::GetCurShape(const uint32_t model_id, std::vector &batch_info, int32_t &dynamic_type) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHECK_NOTNULL(davinci_model); - davinci_model->GetCurShape(batch_info); + davinci_model->GetCurShape(batch_info, dynamic_type); return SUCCESS; } @@ -724,8 +742,8 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, std::vector &inputFormats, std::vector &outputFormats) { std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, - "GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", + model_id); return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats); } @@ -767,8 +785,8 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr listener, void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { - GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, PARAM_INVALID, - "input key file path is not valid, %s", strerror(errno)); + GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, GE_EXEC_MODEL_KEY_PATH_INVALID, + "input key file path %s is invalid, %s", model.key.c_str(), strerror(errno)); GenModelId(&model_id); shared_ptr davinci_model = nullptr; @@ -786,11 +804,11 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model try { davinci_model = std::make_shared(model.priority, listener); } catch (std::bad_alloc &) { - GELOGE(FAILED, "Make shared failed"); - return FAILED; + GELOGE(MEMALLOC_FAILED, "Make shared failed"); + return MEMALLOC_FAILED; } catch (...) { - GELOGE(FAILED, "Make shared failed since other exception raise"); - return FAILED; + GELOGE(INTERNAL_ERROR, "Make shared failed since other exception raise"); + return INTERNAL_ERROR; } ret = davinci_model->Assign(ge_model); if (ret != SUCCESS) { @@ -803,7 +821,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model rtError_t rt_ret = rtGetDevice(&device_id); if (rt_ret != RT_ERROR_NONE || device_id < 0) { GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); - return FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } davinci_model->SetDeviceId(device_id); davinci_model->SetOmName(model.om_name); @@ -851,8 +869,9 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, const std::vector &input_queue_ids, const std::vector &output_queue_ids) { - GE_CHK_BOOL_RET_STATUS(model_data.key.empty() || access(model_data.key.c_str(), F_OK) == 0, PARAM_INVALID, - "input key file path is not valid, %s", strerror(errno)); + GE_CHK_BOOL_RET_STATUS(model_data.key.empty() || access(model_data.key.c_str(), F_OK) == 0, + GE_EXEC_MODEL_KEY_PATH_INVALID, "input key file path %s is not valid, %s", + model_data.key.c_str(), strerror(errno)); ModelHelper model_helper; Status ret = model_helper.LoadModel(model_data); @@ -863,8 +882,8 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d shared_ptr davinci_model = MakeShared(model_data.priority, nullptr); if (davinci_model == nullptr) { - GELOGE(FAILED, "create model failed."); - return FAILED; + GELOGE(MEMALLOC_FAILED, "create model failed."); + return MEMALLOC_FAILED; } ret = davinci_model->Assign(model_helper.GetGeModel()); @@ -916,7 +935,7 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, OutputData &output_data) { std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to start! ", model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id); if (davinci_model->NeedDestroyAicpuKernel()) { GELOGI("Start to destroy specified aicpu kernel."); @@ -973,29 +992,30 @@ Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &me auto partition_table = reinterpret_cast(model_data); if (partition_table->num == 1) { - GELOGE(FAILED, "om model is error,please use executable om model"); - return FAILED; + GELOGE(GE_EXEC_MODEL_PARTITION_NUM_INVALID, "om model is error,please use executable om model"); + return GE_EXEC_MODEL_PARTITION_NUM_INVALID; } ModelPartition task_partition; if (om_file_helper.GetModelPartition(ModelPartitionType::TASK_INFO, task_partition) != SUCCESS) { - GELOGE(FAILED, "get task model partition failed."); - return FAILED; + GELOGE(GE_EXEC_LOAD_TASK_PARTITION_FAILED, "get task model partition failed."); + return GE_EXEC_LOAD_TASK_PARTITION_FAILED; } std::shared_ptr model_task_def = MakeShared(); if (model_task_def == nullptr) { - return FAILED; + return MEMALLOC_FAILED; } if (task_partition.size != 0) { if (!ReadProtoFromArray(task_partition.data, static_cast(task_partition.size), model_task_def.get())) { - GELOGE(FAILED, "ReadProtoFromArray failed."); - return FAILED; + GELOGE(GE_EXEC_LOAD_TASK_PARTITION_FAILED, "ReadProtoFromArray failed."); + return GE_EXEC_LOAD_TASK_PARTITION_FAILED; } } ModelPartition partition_weight; ret = om_file_helper.GetModelPartition(ModelPartitionType::WEIGHTS_DATA, partition_weight); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Get weight partition failed. ret = %u", ret); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED, + "Get weight partition failed. ret = %u", ret); mem_size = model_task_def->memory_size(); weight_size = partition_weight.size; diff --git a/src/ge/graph/load/new_model_manager/model_manager.h b/src/ge/graph/load/new_model_manager/model_manager.h index 2ba23d7c..153d324d 100644 --- a/src/ge/graph/load/new_model_manager/model_manager.h +++ b/src/ge/graph/load/new_model_manager/model_manager.h @@ -187,9 +187,19 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { /// @brief Get dynamic batch_info /// @param [in] model_id /// @param [out] batch_info + /// @param [out] dynamic_type /// @return execute result /// - ge::Status GetDynamicBatchInfo(const uint32_t model_id, std::vector> &batch_info); + ge::Status GetDynamicBatchInfo(const uint32_t model_id, std::vector> &batch_info, + int32_t &dynamic_type); + /// + /// @ingroup ge + /// @brief Get combined dynamic dims info + /// @param [in] model_id + /// @param [out] batch_info + /// @return execute result + /// + ge::Status GetCombinedDynamicDims(const uint32_t model_id, std::vector> &batch_info); /// /// @ingroup ge @@ -215,13 +225,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { std::vector &inputFormats, std::vector &outputFormats); - ge::Status GetCurShape(const uint32_t model_id, std::vector &batch_info); + ge::Status GetCurShape(const uint32_t model_id, std::vector &batch_info, int32_t &dynamic_type); ge::Status GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info); ge::Status SetDevice(int32_t deviceId) const; - ge::Status SetDynamicSize(uint32_t model_id, const std::vector &batch_num); + ge::Status SetDynamicSize(uint32_t model_id, const std::vector &batch_num, int32_t dynamic_type); /// /// @ingroup domi_ome diff --git a/src/ge/graph/load/new_model_manager/model_utils.cc b/src/ge/graph/load/new_model_manager/model_utils.cc index bd684b9d..2b341edc 100644 --- a/src/ge/graph/load/new_model_manager/model_utils.cc +++ b/src/ge/graph/load/new_model_manager/model_utils.cc @@ -56,6 +56,7 @@ vector ModelUtils::GetInputSize(ConstOpDescPtr op_desc) { if (tensor_size) { v_input_size.push_back(tensor_size); } + GELOGI("[IMAS]GetInputSize op: %s, index: %lu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); continue; } @@ -64,6 +65,8 @@ vector ModelUtils::GetInputSize(ConstOpDescPtr op_desc) { GELOGI("Get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); continue); + GELOGI("[IMAS]GetInputSize op: %s, index: %lu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); + v_input_size.push_back(tensor_size); } diff --git a/src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc index 920b52e6..39f0591d 100644 --- a/src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc @@ -34,7 +34,7 @@ Status EndGraphTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList()); if (ret != SUCCESS) { GELOGE(ret, "SetStream fail, stream_id:%u", task_def.stream_id()); - return FAILED; + return ret; } model_ = davinci_model->GetRtModelHandle(); @@ -53,14 +53,14 @@ Status EndGraphTaskInfo::Distribute() { rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rtEndGraphEx failed, ret: 0x%x", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } } else { GELOGI("Start to call rtEndGraph"); rtError_t rt_ret = rtEndGraph(model_, stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rtEndGraph failed, ret: 0x%x", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } } @@ -69,7 +69,7 @@ Status EndGraphTaskInfo::Distribute() { rtError_t rt_ret = rtModelGetTaskId(davinci_model_->GetRtModelHandle(), &task_id, &stream_id); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } task_id_ = task_id; stream_id_ = stream_id; diff --git a/src/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc index edfd8d17..f742118c 100644 --- a/src/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc @@ -49,7 +49,7 @@ Status EventRecordTaskInfo::Distribute() { rtError_t rt_ret = rtEventRecord(event_, stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } return SUCCESS; diff --git a/src/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc index a8db158d..e8f96b35 100644 --- a/src/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc @@ -51,13 +51,13 @@ Status EventWaitTaskInfo::Distribute() { rtError_t rt_ret = rtStreamWaitEvent(stream_, event_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtEventReset(event_, stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } return SUCCESS; diff --git a/src/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc index f3fa7959..9b1ea04a 100644 --- a/src/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc @@ -40,7 +40,7 @@ Status FusionStartTaskInfo::Distribute() { rtError_t rt_ret = rtKernelFusionStart(stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } GELOGI("FusionStartTaskInfo Distribute Success."); diff --git a/src/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc index 128fb325..7acbb5b3 100644 --- a/src/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc @@ -40,7 +40,7 @@ Status FusionStopTaskInfo::Distribute() { rtError_t rt_ret = rtKernelFusionEnd(stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } GELOGI("FusionStopTaskInfo Distribute Success."); diff --git a/src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc index 2a79997f..cb8cfed6 100644 --- a/src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc @@ -73,24 +73,24 @@ Status HcclTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_m // Only in Horovod scenario should get the inputName and GeShape ret = HcomOmeUtil::GetHorovodInputs(op_desc_, kernel_hccl_infos_); if (ret != SUCCESS) { - GELOGE(FAILED, "davinci_model: GetHorovodInputs fail! domi error: %u", ret); - return FAILED; + GELOGE(ret, "davinci_model: GetHorovodInputs fail! domi error: %u", ret); + return ret; } Status dmrt = HcomOmeUtil::GetHcclDataType(op_desc_, kernel_hccl_infos_); if (dmrt != SUCCESS) { - GELOGE(FAILED, "davinci_model: GetHcomDataType fail! domi error: %u", dmrt); - return FAILED; + GELOGE(dmrt, "davinci_model: GetHcomDataType fail! domi error: %u", dmrt); + return dmrt; } dmrt = HcomOmeUtil::GetHcclCount(op_desc_, kernel_hccl_infos_); if (dmrt != SUCCESS) { - GELOGE(FAILED, "davinci_model: GetHcomCount fail! domi error: %u", dmrt); - return FAILED; + GELOGE(dmrt, "davinci_model: GetHcomCount fail! domi error: %u", dmrt); + return dmrt; } // Only HCOMBROADCAST and HVDCALLBACKBROADCAST need to get the rootId dmrt = HcomOmeUtil::GetAllRootId(op_desc_, kernel_hccl_infos_); if (dmrt != SUCCESS) { - GELOGE(FAILED, "davinci_model: Get rootId fail! domi error: %u", dmrt); - return FAILED; + GELOGE(dmrt, "davinci_model: Get rootId fail! domi error: %u", dmrt); + return dmrt; } // GE's new process: hccl declares the number of streams required, creates a stream by GE, and sends it to hccl @@ -138,8 +138,8 @@ Status HcclTaskInfo::SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciM uint32_t max_task_count; ret = rtGetMaxStreamAndTask(RT_NORMAL_STREAM, &max_stream_count, &max_task_count); if (ret != RT_ERROR_NONE) { - GELOGE(FAILED, "Get max stream and task count by rts failed."); - return FAILED; + GELOGE(RT_FAILED, "Get max stream and task count by rts failed."); + return RT_ERROR_TO_GE_STATUS(ret); } max_node_of_hccl_stream_ = max_task_count / kMaxTaskOfStream; } @@ -153,8 +153,8 @@ Status HcclTaskInfo::SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciM ReuseStream(created_stream_num, davinci_model); ret = CreateStream(hccl_stream_num - created_stream_num, davinci_model); if (ret != SUCCESS) { - GELOGE(FAILED, "Create hccl stream failed."); - return FAILED; + GELOGE(RT_FAILED, "Create hccl stream failed."); + return RT_ERROR_TO_GE_STATUS(ret); } } GELOGI("Initialize hccl slave stream success, hcclStreamNum =%ld", hccl_stream_num); @@ -179,14 +179,14 @@ Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_mode rtStreamCreateWithFlags(&stream, davinci_model->Priority(), RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } // Create slave stream, inactive by default, activated by hccl rt_ret = rtModelBindStream(davinci_model->GetRtModelHandle(), stream, RT_MODEL_WAIT_ACTIVE_STREAM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); (void)rtStreamDestroy(stream); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } GELOGD("hccl_stream addr is=%p", stream); int64_t remain_cap = max_node_of_hccl_stream_ - 1; @@ -250,8 +250,7 @@ Status HcclTaskInfo::UpdateArgs() { io_addrs.insert(io_addrs.end(), output_data_addrs_.begin(), output_data_addrs_.end()); io_addrs.insert(io_addrs.end(), workspace_data_addrs_.begin(), workspace_data_addrs_.end()); - GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_), - "update known node %s zero copy addr failed.", op_desc_->GetName().c_str()); + davinci_model_->SetTotalIOAddrs(io_addrs); GELOGI("HcclTaskInfo::UpdateArgs success."); return SUCCESS; diff --git a/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc index a241e129..4f72ec36 100644 --- a/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc @@ -72,11 +72,11 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin auto rt_ret = rtMalloc(&ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); - return FAILED;) + return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtMemcpy(ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); - return FAILED;) + return RT_ERROR_TO_GE_STATUS(rt_ret);) } GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", op_desc_->GetName().c_str(), @@ -113,7 +113,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin static_cast(reinterpret_cast(input_output_addr)); void *workspace_base_addr = nullptr; rtError_t rt_ret = rtMalloc(&workspace_base_addr, kernel_ex_def.task_info_size(), RT_MEMORY_HBM); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error, ret: Ox%X", rt_ret); return FAILED;); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: Ox%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret);); rt_ret = rtMemcpy(workspace_base_addr, kernel_ex_def.task_info_size(), kernel_ex_def.task_info().data(), kernel_ex_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE); fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = @@ -123,20 +124,23 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast(ext_info_addr_); rt_ret = rtMalloc(&kernel_buf_, kernel_buf_size_, RT_MEMORY_HBM); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error: 0x%X", rt_ret); return FAILED;) + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtMemcpy(kernel_buf_, kernel_buf_size_, static_cast(&fwk_op_kernel), kernel_buf_size_, RT_MEMCPY_HOST_TO_DEVICE); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;) + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret);) GELOGI("KernelExTaskInfo knonw node Init Success."); return SUCCESS; } // 3. Set workspaceaddr, inputOutputDataAddr - if (CopyTaskInfo(kernel_ex_def, rts_param, op_desc) != SUCCESS) { - GELOGE(FAILED, "copy task info to workspace failed."); - return FAILED; + Status ge_ret = CopyTaskInfo(kernel_ex_def, rts_param, op_desc); + if (ge_ret != SUCCESS) { + GELOGE(ge_ret, "copy task info to workspace failed."); + return ge_ret; } const vector workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc); @@ -155,11 +159,12 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin auto addrs_size = sizeof(uint64_t) * (io_addrs.size()); if (addrs_size > 0) { rtError_t rt_ret = rtMalloc(&input_output_addr_, addrs_size, RT_MEMORY_HBM); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); return RT_FAILED;) + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtMemcpy(input_output_addr_, addrs_size, io_addrs.data(), addrs_size, RT_MEMCPY_HOST_TO_DEVICE); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); - return FAILED;) + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret);) if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), op_desc->GetName())) { @@ -177,11 +182,13 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin // 4. Return result rtError_t rt_ret = rtMalloc(&kernel_buf_, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error: 0x%X", rt_ret); return FAILED;) + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtMemcpy(kernel_buf_, sizeof(STR_FWK_OP_KERNEL), static_cast(&fwk_op_kernel), sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;) + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret);) davinci_model_->SetZeroCopyAddr(op_desc, io_addrs, io_addrs.data(), input_output_addr_, addrs_size, 0); @@ -254,9 +261,7 @@ Status KernelExTaskInfo::UpdateArgs() { } } } - GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_), - "update known node %s zero copy addr failed.", op_desc_->GetName().c_str()); - + davinci_model_->SetTotalIOAddrs(io_addrs); GELOGI("KernelExTaskInfo::UpdateArgs success."); return SUCCESS; } @@ -286,8 +291,8 @@ Status KernelExTaskInfo::CopyTaskInfo(const domi::KernelExDef &kernel_def, const rtError_t rt_ret = rtMemcpy(workspace_data_addrs[0], kernel_def.task_info_size(), kernel_def.task_info().data(), kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { - GELOGE(FAILED, "rtMemcpy error: 0x%X", rt_ret); - return FAILED; + GELOGE(RT_FAILED, "rtMemcpy error: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } return SUCCESS; @@ -298,7 +303,7 @@ Status KernelExTaskInfo::Distribute() { rtError_t rt_ret = rtKernelLaunchEx(kernel_buf_, kernel_buf_size_, dump_flag_, stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } if (davinci_model_ == nullptr) { @@ -311,7 +316,7 @@ Status KernelExTaskInfo::Distribute() { rt_ret = rtModelGetTaskId(davinci_model_->GetRtModelHandle(), &task_id, &stream_id); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } task_id_ = task_id; stream_id_ = stream_id; @@ -326,7 +331,7 @@ Status KernelExTaskInfo::Release() { rtError_t rt_ret = rtFree(kernel_buf_); if (rt_ret != RT_ERROR_NONE) { GELOGW("rtFree error, ret: 0x%X", rt_ret); - ret = FAILED; + ret = RT_ERROR_TO_GE_STATUS(rt_ret); } else { kernel_buf_ = nullptr; } @@ -335,7 +340,7 @@ Status KernelExTaskInfo::Release() { rtError_t rt_ret = rtFree(input_output_addr_); if (rt_ret != RT_ERROR_NONE) { GELOGW("rtFree error, ret: 0x%X", rt_ret); - ret = FAILED; + ret = RT_ERROR_TO_GE_STATUS(rt_ret); } else { input_output_addr_ = nullptr; } @@ -344,7 +349,7 @@ Status KernelExTaskInfo::Release() { rtError_t rt_ret = rtFree(ext_info_addr_); if (rt_ret != RT_ERROR_NONE) { GELOGW("rtFree ext_info_addr[%p] error, ret: 0x%X", ext_info_addr_, rt_ret); - ret = FAILED; + ret = RT_ERROR_TO_GE_STATUS(rt_ret); } else { ext_info_addr_ = nullptr; } diff --git a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 12fe0206..da6d05ca 100644 --- a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -99,13 +99,13 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci rt_ret = rtGetFunctionByName(const_cast(kernel_def.stub_func().c_str()), &stub_func_); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s", kernel_def.stub_func().c_str()); - return RT_FAILED;); + return RT_ERROR_TO_GE_STATUS(rt_ret);); } else if (kernel_type_ != cce::ccKernelType::AI_CPU) { rtError_t rt_ret; rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key); - return RT_FAILED;); + return RT_ERROR_TO_GE_STATUS(rt_ret);); } if (context.origin_op_index_size() > CC_FUSION_OP_MAX) { @@ -232,7 +232,7 @@ Status KernelTaskInfo::SuperKernelLaunch() { skt_info_.last_dump_flag); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "SuperKernelLaunch: Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } call_save_dump_ = true; GE_CHK_STATUS_RET(SKTFinalize(), "Skt finalize failed"); @@ -241,21 +241,24 @@ Status KernelTaskInfo::SuperKernelLaunch() { // Create super kernel factory skt::SuperKernelFactory *factory = &skt::SuperKernelFactory::GetInstance(); // Init super kernel factory - if (factory->Init() != SUCCESS) { - GELOGE(RT_FAILED, "SuperKernelLaunch: SuperKernelFactory init failed"); - return RT_FAILED; + Status ge_ret = factory->Init(); + if (ge_ret != SUCCESS) { + GELOGE(ge_ret, "SuperKernelLaunch: SuperKernelFactory init failed"); + return ge_ret; } // Call the fuse API std::unique_ptr superKernel = nullptr; - if (factory->FuseKernels(skt_kernel_list, skt_arg_list, skt_info_.last_block_dim, superKernel) != SUCCESS) { - GELOGE(RT_FAILED, "SuperKernelLaunch: fuse call failed"); - return RT_FAILED; + ge_ret = factory->FuseKernels(skt_kernel_list, skt_arg_list, skt_info_.last_block_dim, superKernel); + if (ge_ret != SUCCESS) { + GELOGE(ge_ret, "SuperKernelLaunch: fuse call failed"); + return ge_ret; } // Launch a super kernel skt_dump_flag_ = GetDumpFlag(); - if (superKernel->Launch(skt_info_.last_stream, skt_dump_flag_) != SUCCESS) { - GELOGE(RT_FAILED, "SuperKernelLaunch: launch failed"); - return RT_FAILED; + ge_ret = superKernel->Launch(skt_info_.last_stream, skt_dump_flag_); + if (ge_ret != SUCCESS) { + GELOGE(ge_ret, "SuperKernelLaunch: launch failed"); + return ge_ret; } GELOGI("SuperKernelLaunch: success[skt_kernel_list size[%zu] skt_arg_list[%zu]]", skt_kernel_list.size(), skt_arg_list.size()); @@ -276,9 +279,9 @@ Status KernelTaskInfo::SaveSuperKernelInfo() { skt_info_.last_dump_flag = dump_flag_; skt_info_.dump_flag_list.push_back(dump_flag_); skt_info_.op_desc_list.push_back(op_desc_); - skt_info_.dump_args_list.push_back(reinterpret_cast(dump_args_)); + skt_info_.dump_args_list.push_back(reinterpret_cast(skt_dump_args_)); skt_info_.last_group_key = group_key_; - skt_info_.last_dump_args = reinterpret_cast(dump_args_); + skt_info_.last_dump_args = reinterpret_cast(skt_dump_args_); skt_info_.last_op = op_desc_; // last node in a stream, just launch if (IsMarkedLastNode()) { @@ -345,15 +348,15 @@ Status KernelTaskInfo::SuperKernelDistribute() { // 1.launch before ret = SuperKernelLaunch(); if (ret != SUCCESS) { - GELOGE(FAILED, "Call SuperKernelLaunch failed!"); - return FAILED; + GELOGE(ret, "Call SuperKernelLaunch failed!"); + return ret; } // 2.launch current rtError_t rt_ret = rtKernelLaunchWithFlag(stub_func_, block_dim_, args_, args_size_, static_cast(sm_desc_), stream_, dump_flag_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return FAILED; + return rt_ret; } call_save_dump_ = true; UpdateTaskId(); @@ -361,8 +364,8 @@ Status KernelTaskInfo::SuperKernelDistribute() { } else { ret = SaveSuperKernelInfo(); if (ret != SUCCESS) { - GELOGE(FAILED, "Call SuperKernelLaunch failed!"); - return FAILED; + GELOGE(ret, "Call SuperKernelLaunch failed!"); + return ret; } GELOGI("Save Current task [block_dim:%u, size:%zu].", block_dim_, skt_info_.kernel_list.size()); } @@ -403,7 +406,7 @@ Status KernelTaskInfo::Distribute() { } if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } // set for task_id_ UpdateTaskId(); @@ -448,9 +451,7 @@ Status KernelTaskInfo::UpdateArgs() { } } - GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_), - "update known node %s zero copy addr failed.", op_desc_->GetName().c_str()); - + davinci_model_->SetTotalIOAddrs(io_addrs); GELOGI("KernelTaskInfo::UpdateArgs success."); return SUCCESS; } @@ -459,26 +460,31 @@ Status KernelTaskInfo::Release() { if (davinci_model_ != nullptr && davinci_model_->IsKnownNode()) { return SUCCESS; } - FreeRtMem(&args_); - FreeRtMem(&superkernel_device_args_addr_); - FreeRtMem(&superkernel_dev_nav_table_); - FreeRtMem(&flowtable_); - FreeRtMem(&custom_info_.input_descs); - FreeRtMem(&custom_info_.input_addrs); - FreeRtMem(&custom_info_.output_descs); - FreeRtMem(&custom_info_.output_addrs); - FreeRtMem(&custom_info_.attr_handle); - FreeRtMem(&aicpu_ext_info_addr_); + rtContext_t ctx = nullptr; + rtError_t ret = rtCtxGetCurrent(&ctx); + + if (ret == RT_ERROR_NONE) { + FreeRtMem(&args_); + FreeRtMem(&superkernel_device_args_addr_); + FreeRtMem(&superkernel_dev_nav_table_); + FreeRtMem(&flowtable_); + FreeRtMem(&custom_info_.input_descs); + FreeRtMem(&custom_info_.input_addrs); + FreeRtMem(&custom_info_.output_descs); + FreeRtMem(&custom_info_.output_addrs); + FreeRtMem(&custom_info_.attr_handle); + FreeRtMem(&aicpu_ext_info_addr_); + } if (ctx_.argsOffset != nullptr) { delete[] ctx_.argsOffset; ctx_.argsOffset = nullptr; } - rtError_t ret = (sm_desc_ != nullptr) ? rtMemFreeManaged(sm_desc_) : RT_ERROR_NONE; + ret = (sm_desc_ != nullptr) ? rtMemFreeManaged(sm_desc_) : RT_ERROR_NONE; if (ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", static_cast(ret)); - return FAILED; + return RT_ERROR_TO_GE_STATUS(ret); } sm_desc_ = nullptr; @@ -508,13 +514,13 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { rtError_t rt_ret = rtMemAllocManaged(&sm_desc_, sm_desc.size(), RT_MEMORY_SPM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMemcpy(sm_desc_, sm_desc.size(), sm_desc.data(), sm_desc.size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } return SUCCESS; @@ -591,14 +597,14 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } // copy orign args rt_ret = rtMemcpy(args_, args_size_, kernel_def.args().data(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } vector args_info(args_size_); errno_t sec_ret = memcpy_s(args_info.data(), args_size_, kernel_def.args().data(), args_size_); @@ -617,7 +623,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne kAddrLen * tensor_device_addrs.size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } sec_ret = memcpy_s(args_info.data() + offset, args_size_ - offset, tensor_device_addrs.data(), kAddrLen * tensor_device_addrs.size()); @@ -625,16 +631,17 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); return FAILED; } - + skt_dump_args_ = static_cast(args_) + offset; if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), op_desc->GetName())) { dump_flag_ = RT_KERNEL_DUMPFLAG; dump_args_ = static_cast(args_) + offset; } + Status ge_ret = UpdateL2Data(kernel_def); // update origin l2 data - if (UpdateL2Data(kernel_def) != SUCCESS) { - return RT_FAILED; + if (ge_ret != SUCCESS) { + return ge_ret; } vector virtual_io_addrs; // use virtual address for zero copy key. @@ -698,13 +705,13 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel rtError_t rt_ret = rtMalloc(&custom_info_.attr_handle, op_attr_size, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMemcpy(custom_info_.attr_handle, op_attr_size, buffer.GetData(), op_attr_size, RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } // args @@ -731,14 +738,14 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } davinci_model_->SetZeroCopyAddr(op_desc, input_data_addrs, input_data_addrs.data(), custom_info_.input_addrs, @@ -784,7 +791,8 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) { ctx_.genVariableBaseSize = davinci_model_->TotalVarMemSize(); ctx_.l2ctrlSize = sm_contrl_size; - if (UpdateCceArgs(sm_desc, flowtable, kernel_def) != SUCCESS) { + ret = UpdateCceArgs(sm_desc, flowtable, kernel_def); + if (ret != SUCCESS) { GELOGE(ret, "update cce args fail"); return ret; } @@ -800,7 +808,7 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) { rtError_t rt_ret = rtMalloc(&args_, kernel_def.args_size(), RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "cce task physical memory.", kernel_def.args_size()) @@ -808,7 +816,7 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) { rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } // L2 @@ -816,13 +824,13 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) { rt_ret = rtMemAllocManaged(&sm_desc_, sm_desc.size(), RT_MEMORY_SPM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMemcpy(sm_desc_, sm_desc.size(), sm_desc.data(), sm_desc.size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } } return SUCCESS; @@ -883,7 +891,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k rtError_t rt_ret = rtMalloc(static_cast(&args_), args_size_, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api(rtMalloc) failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "cce task physical memory.", args_size_) @@ -891,7 +899,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), @@ -912,12 +920,12 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); - return FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); - return FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } return SUCCESS; @@ -934,7 +942,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector &input_d rtError_t rt_ret = rtMalloc(&custom_info_.input_descs, sizeof(opTensor_t) * input_size, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } for (std::size_t i = 0; i < input_size; ++i) { @@ -942,7 +950,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector &input_d const_cast(&input_descs[i]), sizeof(opTensor_t), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } } @@ -950,7 +958,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector &input_d rt_ret = rtMalloc(&custom_info_.input_addrs, sizeof(opTensor_t) * input_size, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } if (!input_data_addrs.empty()) { @@ -958,7 +966,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector &input_d RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } } @@ -966,14 +974,14 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector &input_d rt_ret = rtMalloc(&custom_info_.output_descs, sizeof(opTensor_t) * output_size, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } for (std::size_t i = 0; i < output_size; ++i) { rt_ret = rtMemcpy(static_cast(custom_info_.output_descs) + i, sizeof(opTensor_t), const_cast(&input_descs[i]), sizeof(opTensor_t), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } } @@ -981,7 +989,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector &input_d rt_ret = rtMalloc(&custom_info_.output_addrs, sizeof(opTensor_t) * output_size, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } if (!output_data_addrs.empty()) { @@ -989,7 +997,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector &input_d RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } } @@ -1051,8 +1059,8 @@ Status KernelTaskInfo::UpdateCceArgs(std::string &sm_desc, std::string &flowtabl Status status = CceUpdateKernelArgs(context, data_base_addr, weight_base_addr, var_base_addr, sm_desc, flowtable, kernel_def); if (status != SUCCESS) { - GELOGE(FAILED, "Call cce api failed"); - return FAILED; + GELOGE(status, "Call cce api failed"); + return status; } return SUCCESS; } @@ -1118,14 +1126,14 @@ Status KernelTaskInfo::SetFlowtable(std::string &flowtable, const domi::KernelDe rtError_t rt_ret = rtMalloc(&flowtable_, flowtable.size(), RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "flowtable refresh of cce scence.", flowtable.size()) rt_ret = rtMemcpy(flowtable_, flowtable.size(), flowtable.data(), flowtable.size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } // modify flowtable addr in args diff --git a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h index 04cd6312..cc8edc07 100644 --- a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h +++ b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h @@ -165,6 +165,7 @@ class KernelTaskInfo : public TaskInfo { void *aicpu_ext_info_addr_ = nullptr; // For super kernel + void *skt_dump_args_ = nullptr; uint32_t skt_id_; std::string stub_func_name_; bool is_l1_fusion_enable_; diff --git a/src/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc index c157b1df..75f6c121 100644 --- a/src/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc @@ -59,7 +59,7 @@ Status LabelGotoExTaskInfo::Distribute() { rtError_t rt_ret = rtLabelGotoEx(label_, stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } GELOGI("LabelGotoExTaskInfo Distribute Success."); diff --git a/src/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc index e8888eef..de6a1d65 100644 --- a/src/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc @@ -59,7 +59,7 @@ Status LabelSetTaskInfo::Distribute() { rtError_t rt_ret = rtLabelSet(label_, stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } GELOGI("LabelSetTaskInfo Distribute Success."); diff --git a/src/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc index 162cf00d..efefd3e2 100644 --- a/src/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc @@ -98,13 +98,13 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo rtError_t rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtLabelListCpy(label_list_.data(), label_list_.size(), args_, args_size_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } GELOGI("LabelSwitchByIndexTaskInfo Init success, branch max: %u.", branch_max_); diff --git a/src/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc index af32b44f..8cac9f82 100644 --- a/src/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc @@ -19,6 +19,10 @@ #include "framework/common/debug/ge_log.h" #include "graph/load/new_model_manager/davinci_model.h" +namespace { +const uint32_t kAlignBytes = 64; +} + namespace ge { Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { GELOGI("MemcpyAddrAsyncTaskInfo Init Start"); @@ -55,39 +59,40 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel // malloc args memory size_t args_size = sizeof(void *) * io_addrs.size(); - rtError_t rt_ret = rtMalloc(&args_, args_size, RT_MEMORY_HBM); + rtError_t rt_ret = rtMalloc(&args_, args_size + kAlignBytes, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } + args_align_ = reinterpret_cast((reinterpret_cast(args_) / kAlignBytes + 1) * kAlignBytes); // copy orign src/dst - GELOGI("src_args:%p, destMax:%zu, src_:%p, dst_args:%p, dst_:%p, count=%zu", args_, args_size, src_, - static_cast(args_) + args_size, dst_, io_addrs.size()); - rt_ret = rtMemcpy(args_, args_size, io_addrs.data(), args_size, RT_MEMCPY_HOST_TO_DEVICE); + GELOGI("src_args:%p, destMax:%zu, src_:%p, dst_args:%p, dst_:%p, count=%zu", args_align_, args_size, src_, + static_cast(args_align_) + args_size, dst_, io_addrs.size()); + rt_ret = rtMemcpy(args_align_, args_size, io_addrs.data(), args_size, RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api for src failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } count_ = memcpy_async.count(); kind_ = memcpy_async.kind(); dst_max_ = memcpy_async.dst_max(); GELOGI("InitMemcpyAddrAsyncTaskInfo, logic[0x%lx, 0x%lx], src:%p, dst:%p, max:%lu, count:%lu, args:%p, size:%zu", - memcpy_async.src(), memcpy_async.dst(), src_, dst_, dst_max_, count_, args_, args_size); + memcpy_async.src(), memcpy_async.dst(), src_, dst_, dst_max_, count_, args_align_, args_size); - davinci_model->SetZeroCopyAddr(op_desc, io_addrs, io_addrs.data(), args_, args_size, 0); + davinci_model->SetZeroCopyAddr(op_desc, io_addrs, io_addrs.data(), args_align_, args_size, 0); return SUCCESS; } Status MemcpyAddrAsyncTaskInfo::Distribute() { GELOGI("MemcpyAddrAsyncTaskInfo Distribute Start, dst_max:%lu, count:%lu, kind:%u", dst_max_, count_, kind_); - rtError_t rt_ret = rtMemcpyAsync(reinterpret_cast(reinterpret_cast(args_) + sizeof(void *)), - dst_max_, args_, count_, static_cast(kind_), stream_); + rtError_t rt_ret = rtMemcpyAsync(reinterpret_cast(reinterpret_cast(args_align_) + sizeof(void *)), + dst_max_, args_align_, count_, static_cast(kind_), stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } return SUCCESS; diff --git a/src/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h b/src/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h index f8bf8a90..90aad9b7 100644 --- a/src/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h +++ b/src/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h @@ -22,7 +22,8 @@ namespace ge { class MemcpyAddrAsyncTaskInfo : public TaskInfo { public: - MemcpyAddrAsyncTaskInfo() : dst_(nullptr), dst_max_(0), src_(nullptr), args_(nullptr), count_(0), kind_(0) {} + MemcpyAddrAsyncTaskInfo() + : dst_(nullptr), dst_max_(0), src_(nullptr), args_(nullptr), args_align_(nullptr), count_(0), kind_(0) {} ~MemcpyAddrAsyncTaskInfo() override { src_ = nullptr; @@ -46,6 +47,7 @@ class MemcpyAddrAsyncTaskInfo : public TaskInfo { uint64_t dst_max_; uint8_t *src_; void *args_; + void *args_align_; uint64_t count_; uint32_t kind_; }; diff --git a/src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc index c2b56436..1cc18a85 100644 --- a/src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc @@ -68,7 +68,7 @@ Status MemcpyAsyncTaskInfo::Distribute() { rtError_t rt_ret = rtMemcpyAsync(dst_, dst_max_, src_, count_, static_cast(kind_), stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } GELOGI("MemcpyAsyncTaskInfo Distribute Success"); @@ -102,8 +102,7 @@ Status MemcpyAsyncTaskInfo::UpdateArgs() { io_addrs.emplace_back(reinterpret_cast(src_)); io_addrs.emplace_back(reinterpret_cast(dst_)); - GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_), - "update memcpyasync in known node zero copy addr failed."); + davinci_model_->SetTotalIOAddrs(io_addrs); GELOGI("MemcpyAsyncTaskInfo::UpdateArgs success."); return SUCCESS; diff --git a/src/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc index 1232ddb2..fd5f4f4c 100644 --- a/src/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc @@ -47,7 +47,7 @@ Status ProfilerTraceTaskInfo::Distribute() { rtError_t rt_ret = rtProfilerTrace(log_id_, notify_, flat_, stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } GELOGI("ProfilerTraceTaskInfo Distribute Success."); diff --git a/src/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc index c30cad09..f48f64e3 100644 --- a/src/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc @@ -74,7 +74,7 @@ Status StreamActiveTaskInfo::Distribute() { rtError_t rt_ret = rtStreamActive(active_stream_, stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } GELOGI("StreamActiveTaskInfo Distribute Success. activeStreamID:%p.", active_stream_); diff --git a/src/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc index 0ebaf573..45db2be5 100644 --- a/src/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc @@ -104,7 +104,7 @@ Status StreamSwitchTaskInfo::Distribute() { rtError_t rt_ret = rtStreamSwitchEx(input_ptr_, cond_, value_ptr_, true_stream_, stream_, data_type_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } GELOGI("StreamSwitchTaskInfo Distribute Success. cond:%d, stream:%p, datatype:%d.", cond_, true_stream_, data_type_); diff --git a/src/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc index 01371af7..d134dfdd 100644 --- a/src/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc @@ -22,10 +22,8 @@ #include "graph/load/new_model_manager/model_utils.h" namespace { -const uint32_t kDynamicBtachParamNum = 1; -const uint32_t kDynamicResolutionParamNum = 2; const uint8_t kStreamSwitchnInputNum = 1; -} // namespace +} namespace ge { Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { @@ -45,10 +43,6 @@ Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel * // set size_ input_size_ = stream_switchn_def.size(); - if (input_size_ != kDynamicBtachParamNum && input_size_ != kDynamicResolutionParamNum) { - GELOGE(FAILED, "The size of dynamic batch or imagesize input is 1 or 2, now it is %u.", input_size_); - return FAILED; - } // set value_ptr_ auto value = stream_switchn_def.target_value(); @@ -95,7 +89,7 @@ Status StreamSwitchNTaskInfo::Distribute() { rtStreamSwitchN(input_ptr_, input_size_, value_ptr_, true_stream_ptr_, element_size_, stream_, data_type_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } GELOGI("StreamSwitchNTaskInfo Distribute Success. inputSize:%u, elementSize:%d, datatype:%d.", input_size_, diff --git a/src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc b/src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc index b8fc77ac..100a4fea 100644 --- a/src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc +++ b/src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc @@ -26,13 +26,15 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) { reinterpret_cast(reinterpret_cast(this->GetNavTableSize()))}; rtError_t rt_ret = rtMalloc((void **)&(device_args_addr_), sizeof(args), RT_MEMORY_HBM); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failied. error: 0x%X", rt_ret); return FAILED;) + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtMemcpy((void *)device_args_addr_, sizeof(args), (void *)args, sizeof(args), RT_MEMCPY_HOST_TO_DEVICE); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failied. error: 0x%X", rt_ret); return FAILED;) + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failied. error: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtKernelLaunchWithFlag((void *const)func_stub_, block_dim_, device_args_addr_, sizeof(args), NULL, stream, dump_flag); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtKernelLaunchWithFlag failied. error: 0x%X", rt_ret); - return FAILED;) + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtKernelLaunchWithFlag failied. error: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret);) return SUCCESS; } } // namespace skt diff --git a/src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc b/src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc index 397c7d98..ca42b4e2 100644 --- a/src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc +++ b/src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc @@ -33,15 +33,15 @@ Status SuperKernelFactory::Init() { } rtError_t rt_ret; rt_ret = rtGetFunctionByName(this->sk_stub_name_.c_str(), &this->func_stub_); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtGetFunctionByName " "failed. stub_func: %s, please export LD_LIBRARY_PATH for " "libcce_aicore.so", this->sk_stub_name_.c_str()); - return FAILED;) + return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtGetAddrByFun(this->func_stub_, &this->func_ptr_); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtGetAddrByFun failed. error: 0x%X", rt_ret); - return FAILED;) + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtGetAddrByFun failed. error: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret);) GELOGD( "SKT: fuseKernels super_kernel_template subFunc %p, device func " "address %p", @@ -94,8 +94,8 @@ Status SuperKernelFactory::FuseKernels(const std::vector &stub_func_list for (unsigned i = 0; i < stub_func_list.size(); i++) { void *sub_device_func = nullptr; rt_ret = rtGetAddrByFun(stub_func_list[i], &sub_device_func); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtGetAddrByFun failed. error: 0x%X", rt_ret); - return FAILED;) + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtGetAddrByFun failed. error: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret);) GELOGD("SKT: fuseKernels subFunc %p, device func address %p", stub_func_list[i], sub_device_func); // store two uint64_t address // address divided by 4 because of 32bits encoding, call offset will *4 when calculating @@ -105,11 +105,12 @@ Status SuperKernelFactory::FuseKernels(const std::vector &stub_func_list GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]); } rt_ret = rtMalloc((void **)&hbm_nav_table_addr, nav_table_size, RT_MEMORY_HBM); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failed. error: 0x%X", rt_ret); return FAILED;) + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failed. error: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret); - GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;) + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failed. error: 0x%X", rt_ret); + GE_CHK_RT(rtFree(hbm_nav_table_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);) // Create the necessary metadata for the super kernel h = std::unique_ptr(new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim)); diff --git a/src/ge/graph/load/new_model_manager/zero_copy_offset.cc b/src/ge/graph/load/new_model_manager/zero_copy_offset.cc new file mode 100644 index 00000000..efe9d6aa --- /dev/null +++ b/src/ge/graph/load/new_model_manager/zero_copy_offset.cc @@ -0,0 +1,218 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/load/new_model_manager/zero_copy_offset.h" + +#include "framework/common/debug/ge_log.h" +#include "framework/common/util.h" +#include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/new_model_manager/zero_copy_task.h" + +namespace ge { +namespace { +const uint32_t kDataIndex = 0; +} // namespace + +ZeroCopyOffset::ZeroCopyOffset() {} + +ZeroCopyOffset::~ZeroCopyOffset() {} + +Status ZeroCopyOffset::InitInputDataInfo(const vector &output_size_list, + const vector &virtual_addr_list, const OpDescPtr &op_desc, + bool &fusion_flag) { + GELOGI("[ZCPY] Start to InitInputDataInfo of %s, total_data_size is %ld, virtual_addr is 0x%lx", + op_desc->GetName().c_str(), output_size_list[kDataIndex], virtual_addr_list[kDataIndex]); + if (output_size_list.empty() || virtual_addr_list.empty() || (output_size_list.size() != virtual_addr_list.size())) { + GELOGE(PARAM_INVALID, "Data[%s] init failed: Output size is %zu, Output addr is %zu", op_desc->GetName().c_str(), + output_size_list.size(), virtual_addr_list.size()); + return PARAM_INVALID; + } + + basic_addr_ = virtual_addr_list[kDataIndex]; + (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset_); + (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset_); + GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(), return PARAM_INVALID, + "basic_offset_size should be equal to relative_offset_size"); + GELOGI("[ZCPY] zero_copy_basic_offset size is %zu", zero_copy_basic_offset_.size()); + + int64_t virtual_addr_offset = op_desc->GetOutputOffset().at(kDataIndex); + GELOGI("virtual_addr_offset is %ld.", virtual_addr_offset); + IsL2Fusion(zero_copy_basic_offset_, virtual_addr_offset, fusion_flag); + + uint32_t out_count = 0; + data_size_ = output_size_list[kDataIndex]; + if (!fusion_flag) { + GELOGI("[ZCPY] %s not set l2_fusion.", op_desc->GetName().c_str()); + out_count++; + data_info_.emplace_back(output_size_list[kDataIndex], virtual_addr_list[kDataIndex]); + relative_offset_.emplace_back(0); + GELOGI("[ZCPY] %s size is %ld, virtual_addr is 0x%lx.", op_desc->GetName().c_str(), output_size_list[kDataIndex], + virtual_addr_list[kDataIndex]); + } else { + GELOGI("[ZCPY] set l2_fusion for %s.", op_desc->GetName().c_str()); + for (size_t index = 0; index < zero_copy_basic_offset_.size(); ++index) { + if (zero_copy_basic_offset_.at(index) == virtual_addr_offset) { + out_count++; + int64_t out_offset = + reinterpret_cast(virtual_addr_list[kDataIndex]) + zero_copy_relative_offset_.at(index); + data_info_.emplace_back(data_size_, reinterpret_cast(out_offset)); + relative_offset_.emplace_back(zero_copy_relative_offset_.at(index)); + GELOGI("[ZCPY] virtual_addr: %p has been l2-fusion to %ld, need copy total_data_size is %ld.", basic_addr_, + out_offset, data_size_); + } + } + } + data_count_ = out_count; + return SUCCESS; +} + +Status ZeroCopyOffset::InitOutputDataInfo(const vector &input_size_list, + const vector &virtual_addr_list, const OpDescPtr &op_desc, + const size_t &idx, bool &fusion_flag) { + GELOGI("[ZCPY] Start to InitOutputDataInfo of %s.", op_desc->GetName().c_str()); + int64_t size = input_size_list[idx]; + auto tensor_desc = op_desc->GetInputDescPtr(idx); + GE_CHECK_NOTNULL(tensor_desc); + if (TensorUtils::GetTensorSizeInBytes(*tensor_desc, size) != GRAPH_SUCCESS) { + GELOGE(FAILED, "GetTensorSizeInBytes failed!"); + return FAILED; + } + + GELOGI("Tensor data size: GetSize=%ld, GetTensorSizeInBytes=%ld", input_size_list[idx], size); + + basic_addr_ = virtual_addr_list[idx]; + (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset_); + (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset_); + GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(), return PARAM_INVALID, + "basic_offset_size should be equal to relative_offset_size"); + int64_t virtual_addr_offset = op_desc->GetInputOffset().at(idx); + GELOGI("virtual_addr_offset is %ld.", virtual_addr_offset); + IsL2Fusion(zero_copy_basic_offset_, virtual_addr_offset, fusion_flag); + + uint32_t in_count = 0; + data_size_ = size; + if (!fusion_flag) { + GELOGI("[ZCPY] %s not set l2-fusion.", op_desc->GetName().c_str()); + in_count++; + data_info_.emplace_back(size, virtual_addr_list[idx]); + // op_desc not set l2fusion when fusion_flag is false + relative_offset_.emplace_back(0); + GELOGI("[ZCPY] %s size is %ld, virtual_addr is 0x%lx.", op_desc->GetName().c_str(), size, virtual_addr_list[idx]); + } else { + GELOGI("[ZCPY] set l2-fusion for %s.", op_desc->GetName().c_str()); + for (size_t index = 0; index < zero_copy_basic_offset_.size(); ++index) { + if (zero_copy_basic_offset_.at(index) == virtual_addr_offset) { + in_count++; + int64_t in_offset = reinterpret_cast(virtual_addr_list[idx]) + zero_copy_relative_offset_.at(index); + data_info_.emplace_back(data_size_, reinterpret_cast(in_offset)); + relative_offset_.emplace_back(zero_copy_relative_offset_.at(index)); + GELOGI("[ZCPY] virtual_addr: %p has been l2-fusion from %ld, need copy total_data_size is %ld.", basic_addr_, + in_offset, data_size_); + } + } + } + data_count_ = in_count; + return SUCCESS; +} + +void ZeroCopyOffset::IsL2Fusion(const vector &fusion_basic_addrs, const int64_t &tensor_offset, + bool &fusion_flag) { + for (size_t fusion_count = 0; fusion_count < fusion_basic_addrs.size(); ++fusion_count) { + if (fusion_basic_addrs.at(fusion_count) == tensor_offset) { + fusion_flag = true; + break; + } + } +} + +void ZeroCopyOffset::SetInputOutsideAddrs(const vector &output_offset_list, void *addr, const size_t &index, + bool fusion_flag, std::vector &real_virtual_addrs) { + GELOGI("[ZCPY] Start to SetInputOutsideAddrs for virtual_addr 0x%lx.", addr); + uint32_t out_count = 0; + if (!fusion_flag) { + GELOGI("[ZCPY] not set l2-fusion for virtual_adr 0x%lx.", addr); + out_count++; + std::map> addr_mapping; + addr_mapping[addr] = {}; + outside_addrs_.emplace_back(addr_mapping); + real_virtual_addrs.emplace_back(addr); + } else { + GELOGI("[ZCPY] set l2-fusion for virtual_addr 0x%lx.", addr); + int64_t output_offset = output_offset_list.at(index); + for (size_t i = 0; i < zero_copy_basic_offset_.size(); ++i) { + if (zero_copy_basic_offset_.at(i) == output_offset) { + out_count++; + void *virtual_addr = + reinterpret_cast(reinterpret_cast(addr) + zero_copy_relative_offset_.at(i)); + std::map> addr_mapping; + addr_mapping[virtual_addr] = {}; + outside_addrs_.emplace_back(addr_mapping); + real_virtual_addrs.emplace_back(virtual_addr); + GELOGI("[ZCPY] virtual_addr %p has been fusion to virtual_addr %p.", addr, virtual_addr); + } + } + } + addr_count_ = out_count; +} + +void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr, + std::vector &tensor_addrs) { + GELOGI("[ZCPY] Start to SetOutputOutsideAddrs for virtual_addr 0x%lx.", addr); + uint32_t out_count = 0; + if (!fusion_flag) { + GELOGI("[ZCPY] not set l2-fusion for virtual_addr 0x%lx.", addr); + out_count++; + std::map> addr_mapping; + addr_mapping[addr] = {}; + outside_addrs_.emplace_back(addr_mapping); + tensor_addrs.emplace_back(addr); + } else { + GELOGI("[ZCPY] set l2-fusion for virtual_addr 0x%lx.", addr); + for (size_t i = 0; i < zero_copy_basic_offset_.size(); ++i) { + if (zero_copy_basic_offset_.at(i) == input_offset) { + out_count++; + void *virtual_addr = + reinterpret_cast(reinterpret_cast(addr) + zero_copy_relative_offset_.at(i)); + std::map> addr_mapping; + addr_mapping[virtual_addr] = {}; + outside_addrs_.emplace_back(addr_mapping); + tensor_addrs.emplace_back(virtual_addr); + GELOGI("[ZCPY] virtual_addr %p has been fusion to virtual_addr %p.", addr, virtual_addr); + } + } + } + addr_count_ = out_count; +} + +bool ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { + const auto addr_val = reinterpret_cast(outside_addr); + bool set_batch_label_flag = false; + for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) { + auto &addrs_mapping_list = GetOutsideAddrs(); + auto args_addrs = addrs_mapping_list[out_count].find(outside_addr); + if (args_addrs != addrs_mapping_list[out_count].end()) { + GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), "Input args invalid."); + void *args_val = static_cast(args) + offset; + args_addrs->second.push_back(args_val); + GELOGI("[ZCPY] set copy input: virtual_addr: %p, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val, args, + offset); + set_batch_label_flag = true; + } + } + return set_batch_label_flag; +} + +} // namespace ge diff --git a/src/ge/graph/load/new_model_manager/zero_copy_offset.h b/src/ge/graph/load/new_model_manager/zero_copy_offset.h new file mode 100644 index 00000000..eb2cdb4d --- /dev/null +++ b/src/ge/graph/load/new_model_manager/zero_copy_offset.h @@ -0,0 +1,84 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_ +#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_ + +#include +#include +#include +#include + +#include "external/ge/ge_api_error_codes.h" +#include "framework/common/ge_types.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/load/new_model_manager/zero_copy_task.h" +#include "graph/utils/attr_utils.h" +#include "graph/utils/tensor_utils.h" +#include "runtime/mem.h" +#include "task_info/task_info.h" + +using std::map; +using std::set; +using std::string; +using std::vector; + +namespace ge { +class ZeroCopyOffset { + public: + ZeroCopyOffset(); + ~ZeroCopyOffset(); + + Status InitInputDataInfo(const vector &output_size_list, const vector &virtual_addr_list, + const OpDescPtr &op_desc, bool &fusion_flag); + void SetInputOutsideAddrs(const vector &output_offset_list, void *addr, const size_t &index, + bool fusion_flag, std::vector &real_virtual_addrs); + + void IsL2Fusion(const vector &fusion_basic_addrs, const int64_t &tensor_addr, bool &fusion_flag); + Status InitOutputDataInfo(const vector &input_size_list, const vector &virtual_addr_list, + const OpDescPtr &op_desc, const size_t &idx, bool &fusion_flag); + void SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr, + std::vector &tensor_addrs); + bool SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset); + + // basic_addr of l2-fusion + void *GetBasicAddr() const { return basic_addr_; } + // total num of out_of_data/in_of_phonyconcat + uint32_t GetDataCount() const { return data_count_; } + uint32_t GetAddrCount() const { return addr_count_; } + // value of *data_info_ from davinci_model + std::vector> GetDataInfo() const { return data_info_; } + // relative_offset from zero_copy_relative_offset_ + std::vector GetRelativeOffset() const { return relative_offset_; } + // data_size of Data/Netoutput + int64_t GetDataSize() const { return data_size_; } + // value of *outside_addrs_ from davinci_model + std::vector>> &GetOutsideAddrs() { return outside_addrs_; } + + private: + void *basic_addr_ = nullptr; + uint32_t data_count_ = 0; + std::vector> data_info_; + vector relative_offset_; + int64_t data_size_ = 0; + uint32_t addr_count_ = 0; + std::vector>> outside_addrs_; + + std::vector zero_copy_basic_offset_; + std::vector zero_copy_relative_offset_; +}; +} // namespace ge +#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_ \ No newline at end of file diff --git a/src/ge/graph/load/new_model_manager/zero_copy_task.cc b/src/ge/graph/load/new_model_manager/zero_copy_task.cc index be75322d..408192ab 100644 --- a/src/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/src/ge/graph/load/new_model_manager/zero_copy_task.cc @@ -16,9 +16,9 @@ #include "graph/load/new_model_manager/zero_copy_task.h" -#include "graph/load/new_model_manager/model_utils.h" #include "framework/common/debug/ge_log.h" #include "framework/common/util.h" +#include "graph/load/new_model_manager/model_utils.h" namespace ge { const char *const kDefaultBatchLable = "Batch_default"; @@ -48,8 +48,8 @@ Status ZeroCopyTask::SetTaskArgsOffset(uintptr_t addr, size_t offset) { it->second.push_back(offset); } - GELOGI("[ZCPY] %s set task, addr: 0x%lx, args: %p, size: %zu, offset: %zu", name_.c_str(), addr, args_addr_, - args_size_, offset); + GELOGI("[ZCPY] %s set task, virtual_addr: 0x%lx, args_addr: %p, size: %zu, offset: %zu", name_.c_str(), addr, + args_addr_, args_size_, offset); return SUCCESS; } @@ -65,7 +65,8 @@ void ZeroCopyTask::SetOriginalArgs(const void *info, size_t size) { const uint8_t *data = static_cast(info); args_info_.assign(data, data + size); - GELOGI("[ZCPY] %s set info, args: %p, args size: %zu, info size: %zu", name_.c_str(), args_addr_, args_size_, size); + GELOGI("[ZCPY] %s set info from virtual_addr: 0x%lx, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), + info, args_addr_, args_size_, size); } /** @@ -110,13 +111,13 @@ bool ZeroCopyTask::CheckDynamicBatch(const map> &batch_ad * @ingroup ge * @brief Set user data addr to Task param. * @param [in] addr: virtual address value from Op. - * @param [in] data: data buffer from user. + * @param [in] buffer_addr: real_data_buffer_addr from user. * @param [in] batch_addrs: dynamic batch addr info. * @param [in] batch_label: batch label. * @return: void */ -Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, const DataBuffer &data, - const map> &batch_addrs, const string &batch_label) { +Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map> &batch_addrs, + const string &batch_label) { for (auto pair : task_addr_offset_) { if (pair.first != addr) { continue; @@ -128,9 +129,9 @@ Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, const DataBuffer &data, continue; } - auto dst_addr = static_cast(data.data); - GELOGI("[ZCPY] %s update task, args: %p, size: %zu, offset: %zu, addr: 0x%lx, length: %u", name_.c_str(), - args_addr_, args_size_, offset, addr, data.length); + auto dst_addr = static_cast(buffer_addr); + GELOGI("[ZCPY] %s update task, args_addr: %p, size: %zu, offset: %zu, virtual_addr: 0x%lx", name_.c_str(), + args_addr_, args_size_, offset, addr); *(uintptr_t *)(args_info + offset) = reinterpret_cast(dst_addr); is_updated_ = true; } @@ -162,11 +163,11 @@ Status ZeroCopyTask::DistributeParam(rtStream_t stream) { } if (rt_err != RT_ERROR_NONE) { - GELOGE(FAILED, "[ZCPY] %s distribute task param failed, error=0x%x", name_.c_str(), rt_err); - return FAILED; + GELOGE(RT_FAILED, "[ZCPY] %s distribute task param failed, error=0x%x", name_.c_str(), rt_err); + return RT_ERROR_TO_GE_STATUS(rt_err); } - GELOGI("[ZCPY] %s refresh task args success, args: %p, size: %zu, args_info_: %p, length: %zu", name_.c_str(), + GELOGI("[ZCPY] %s refresh task args success, args_addr: %p, size: %zu, args_info_: %p, length: %zu", name_.c_str(), args_addr_, args_size_, args_info_.data(), args_info_.size()); return SUCCESS; } diff --git a/src/ge/graph/load/new_model_manager/zero_copy_task.h b/src/ge/graph/load/new_model_manager/zero_copy_task.h index 9d3f5b03..d2a91ce7 100644 --- a/src/ge/graph/load/new_model_manager/zero_copy_task.h +++ b/src/ge/graph/load/new_model_manager/zero_copy_task.h @@ -66,12 +66,12 @@ class ZeroCopyTask { * @ingroup ge * @brief Set user data addr to Task param. * @param [in] addr: virtual address value from Op. - * @param [in] data: data buffer from user. + * @param [in] buffer_addr: data buffer_addr from user. * @param [in] batch_addrs: dynamic batch addr info. * @param [in] batch_label: batch label. * @return: 0 SUCCESS / others FAILED */ - ge::Status UpdateTaskParam(uintptr_t addr, const DataBuffer &data, const map> &batch_addrs, + ge::Status UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map> &batch_addrs, const string &batch_label); /** diff --git a/src/ge/graph/manager/block_memory.h b/src/ge/graph/manager/block_memory.h new file mode 100644 index 00000000..e2bf74b2 --- /dev/null +++ b/src/ge/graph/manager/block_memory.h @@ -0,0 +1,43 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_MANAGER_BLOCK_MEMORY_H_ +#define GE_GRAPH_MANAGER_BLOCK_MEMORY_H_ +namespace ge { +struct Block; +typedef bool (*Comparison)(const Block *, const Block *); +using BlockBin = std::set; + +struct Block { + uint32_t device_id; // npu device id + size_t size; // block size in bytes + BlockBin *bin; // owning block bin + uint8_t *ptr; // memory address + bool allocated; // in-use flag + Block *prev; // prev block if split from a larger allocation + Block *next; // next block if split from a larger allocation + + Block(uint32_t device, size_t size, BlockBin *bin, uint8_t *ptr) + : device_id(device), size(size), bin(bin), ptr(ptr), allocated(false), prev(nullptr), next(nullptr) {} + + // constructor for search key + Block(uint32_t device, size_t size, uint8_t *ptr) + : device_id(device), size(size), bin(nullptr), ptr(ptr), allocated(false), prev(nullptr), next(nullptr) {} + + bool IsSplit() const { return (prev != nullptr) || (next != nullptr); } +}; +} // namespace ge +#endif // GE_GRAPH_MANAGER_BLOCK_MEMORY_H_ diff --git a/src/ge/graph/manager/graph_caching_allocator.cc b/src/ge/graph/manager/graph_caching_allocator.cc index cbeafa3f..4ba39ca8 100644 --- a/src/ge/graph/manager/graph_caching_allocator.cc +++ b/src/ge/graph/manager/graph_caching_allocator.cc @@ -134,11 +134,6 @@ uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device } if (ptr == nullptr) { GELOGE(FAILED, "Malloc failed device id = %u, size= %zu", device_id, size); - } else { - std::lock_guard lock(mutex_); - block->allocated = true; - allocated_blocks_[block->ptr] = block; - GELOGI("Malloc device id = %u, size= %zu", device_id, size); } return ptr; } @@ -222,9 +217,16 @@ Block *CachingAllocator::FindFreeBlock(size_t size, uint8_t *org_ptr, uint32_t d if (block != nullptr) { GELOGI("Find block size = %zu", block->size); if (ShouldSplit(block, size)) { - return SplitBlock(block, size, *bin, device_id); + block = SplitBlock(block, size, *bin, device_id); + } + + if (block->ptr != nullptr) { + block->allocated = true; + allocated_blocks_[block->ptr] = block; + GELOGI("Malloc device id = %u, size= %zu", device_id, size); } } + return block; } return nullptr; diff --git a/src/ge/graph/manager/graph_manager.cc b/src/ge/graph/manager/graph_manager.cc index bfd09c72..361b905d 100644 --- a/src/ge/graph/manager/graph_manager.cc +++ b/src/ge/graph/manager/graph_manager.cc @@ -43,7 +43,9 @@ #include "graph/manager/util/rt_context_util.h" #include "graph/partition/dynamic_shape_partition.h" #include "graph/passes/addn_pass.h" +#include "graph/passes/bitcast_pass.h" #include "graph/passes/atomic_addr_clean_pass.h" +#include "graph/passes/attach_stream_label_pass.h" #include "graph/passes/cast_remove_pass.h" #include "graph/passes/common_subexpression_elimination_pass.h" #include "graph/passes/compile_nodes_pass.h" @@ -58,13 +60,17 @@ #include "graph/passes/hccl_group_pass.h" #include "graph/passes/hccl_memcpy_pass.h" #include "graph/passes/identity_pass.h" +#include "graph/passes/input_output_connection_identify_pass.h" #include "graph/passes/iterator_op_pass.h" #include "graph/passes/link_gen_mask_nodes_pass.h" +#include "graph/passes/mark_graph_unknown_status_pass.h" #include "graph/passes/merge_pass.h" +#include "graph/passes/merge_to_stream_merge_pass.h" #include "graph/passes/multi_batch_pass.h" #include "graph/passes/next_iteration_pass.h" #include "graph/passes/permute_pass.h" #include "graph/passes/prune_pass.h" +#include "graph/passes/ref_identity_delete_op_pass.h" #include "graph/passes/replace_with_empty_const_pass.h" #include "graph/passes/reshape_recovery_pass.h" #include "graph/passes/reshape_remove_pass.h" @@ -73,9 +79,7 @@ #include "graph/passes/switch_data_edges_bypass.h" #include "graph/passes/switch_dead_branch_elimination.h" #include "graph/passes/switch_logic_remove_pass.h" -#include "graph/passes/merge_to_stream_merge_pass.h" #include "graph/passes/switch_to_stream_switch_pass.h" -#include "graph/passes/attach_stream_label_pass.h" #include "graph/passes/transop_breadth_fusion_pass.h" #include "graph/passes/transop_depth_fusion_pass.h" #include "graph/passes/transop_nearby_allreduce_fusion_pass.h" @@ -84,9 +88,9 @@ #include "graph/passes/transpose_transdata_pass.h" #include "graph/passes/variable_op_pass.h" #include "graph/passes/variable_prepare_op_pass.h" -#include "graph/passes/ref_identity_delete_op_pass.h" #include "graph/passes/variable_ref_delete_op_pass.h" #include "graph/passes/variable_ref_useless_control_out_delete_pass.h" +#include "graph/passes/end_of_sequence_add_control_pass.h" #include "graph/utils/tensor_adapter.h" #include "inc/pass_manager.h" #include "init/gelib.h" @@ -701,6 +705,58 @@ Status GraphManager::GenerateInfershapeGraph(GraphId &graph_id) { return ret; } +Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const std::vector &inputs, + GeRootModelPtr &ge_root_model, uint64_t session_id) { + // find graph + GraphNodePtr graph_node = nullptr; + Status ret = GetGraphNode(graph_id, graph_node); + if (ret != SUCCESS) { + GELOGE(ret, "[BuildGraph] graph not exist, graph_id = %u.", graph_id); + return ret; + } + + if (graph_node == nullptr) { + GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[BuildGraph] graph node is NULL, graphId = %u.", graph_id); + return GE_GRAPH_GRAPH_NODE_NULL; + } + auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph()); + GE_CHECK_NOTNULL(compute_graph); + + GM_RUN_AND_DUMP_PERF("Prepare", graph_preparer_.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph, + session_id); + + for (auto &node : compute_graph->GetAllNodes()) { + OpDescPtr op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + if (op_desc->HasAttr(ATTR_NAME_UNREGST_OPPATH)) { + vector node_vec = {node}; + + auto instance_ptr = ge::GELib::GetInstance(); + if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GE is not initialized"); + return GE_CLI_GE_NOT_INITIALIZED; + } + + OpsKernelInfoStorePtr kernel_info = + instance_ptr->OpsKernelManagerObj().GetOpsKernelInfoStore(op_desc->GetOpKernelLibName()); + if (kernel_info == nullptr) { + GELOGE(FAILED, "Get op kernel info store failed"); + return FAILED; + } + + ret = kernel_info->CompileOp(node_vec); + if (ret != SUCCESS) { + GELOGE(ret, "Compile op failed, op = %s, graph_id = %u.", op_desc->GetName().c_str(), graph_id); + return ret; + } + } + } + + GM_RUN_AND_DUMP_PERF("Build", Build, graph_node, compute_graph, ge_root_model, session_id); + + return SUCCESS; +} + Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector &inputs, GeRootModelPtr &ge_root_model, uint64_t session_id, bool async) { GELOGI("[BuildGraph] start to build graph, graph_id=%u.", graph_id); @@ -1711,9 +1767,11 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { ConstantFoldingPass constant_folding_pass; ReshapeRemovePass reshape_remove_pass; CondRemovePass condition_remove_pass; + BitcastPass bitcast_pass; names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass); + names_to_passes.emplace_back("BitcastPass", &bitcast_pass); GE_TIMESTAMP_START(names_to_passes); ret = GEPass(compute_graph).Run(names_to_passes); GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses"); @@ -1750,19 +1808,31 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { new (std::nothrow) VariableRefDeleteOpPass)) GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::CompileNodesPass", new (std::nothrow) CompileNodesPass)) + GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass( + "OptimizeStage2::AfterMergePasses::MarkGraphUnknownStatusPass", new (std::nothrow) MarkGraphUnknownStatusPass)) + GE_CHK_STATUS_RET( + pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::InputOutputConnectionIdentifyPass", + new (std::nothrow) InputOutputConnectionIdentifyPass)) // When the input node to be cleared is after a `Data` node, the atomic-clean-node should not be inserted. // So The ComputeGraph should not delete nodes after `AtomicAddrCleanPass` // to prevent unexpected deletion of nodes after a `Data` node GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::AtomicAddrCleanPass", new (std::nothrow) AtomicAddrCleanPass)) + GE_CHK_STATUS_RET( + pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::" + "EndOfSequenceAddControlPass", + new (std::nothrow) EndOfSequenceAddControlPass)) const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); if (unknown_shape_skip == nullptr) { // SubgraphPass solves memory_assign_conflicts by insert MemcpyAsync node, which depends on multi attrs and // graph-structure. So try not to add new pass after SubgraphPass. GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::SubgraphPass", - new (std::nothrow) SubgraphPass)); + new (std::nothrow) SubgraphPass)) } + // AttachStreamLabelPass modifies attr without changing structure of compute_graph + GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::AttachStreamLabelPass", + new (std::nothrow) AttachStreamLabelPass)) GE_TIMESTAMP_START(pass_for_control_attr_optimize); ret = pass_for_control_attr_optimize.Run(compute_graph); @@ -1840,6 +1910,8 @@ Status GraphManager::OptimizeAfterMergeSubGraph(ge::ComputeGraphPtr &compute_gra after_merge_fusion_passes.AddPass("VariableRefDeleteOpPass", new (std::nothrow) VariableRefDeleteOpPass)); GE_CHK_STATUS_RET(after_merge_fusion_passes.AddPass("SameTransdataBreadthFusionPass", new (std::nothrow) SameTransdataBreadthFusionPass)); + GE_CHK_STATUS_RET( + after_merge_fusion_passes.AddPass("MarkGraphUnknownStatusPass", new (std::nothrow) MarkGraphUnknownStatusPass)); GE_CHK_STATUS_RET(after_merge_fusion_passes.AddPass("AtomicAddrCleanPass", new (std::nothrow) AtomicAddrCleanPass)); GE_CHK_STATUS_RET(after_merge_fusion_passes.AddPass( "LinkGenMaskNodesPass", new (std::nothrow) LinkGenMaskNodesPass(options_.stream_max_parallel_num))); diff --git a/src/ge/graph/manager/graph_manager.h b/src/ge/graph/manager/graph_manager.h index fd9542e8..681efac8 100644 --- a/src/ge/graph/manager/graph_manager.h +++ b/src/ge/graph/manager/graph_manager.h @@ -102,6 +102,9 @@ class GraphManager { ge::Status BuildGraph(const GraphId &graph_id, const std::vector &inputs, GeRootModelPtr &models, uint64_t session_id = 0, bool async = false); + Status BuildGraphForUnregisteredOp(const GraphId &graph_id, const std::vector &inputs, + GeRootModelPtr &ge_root_model, uint64_t session_id); + /// /// @ingroup ge_graph /// @brief Save extra attribute to Model diff --git a/src/ge/graph/manager/host_mem_manager.cc b/src/ge/graph/manager/host_mem_manager.cc new file mode 100644 index 00000000..1d35f7af --- /dev/null +++ b/src/ge/graph/manager/host_mem_manager.cc @@ -0,0 +1,86 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/manager/host_mem_manager.h" + +#include + +#include "graph/utils/tensor_utils.h" + +namespace ge { +Status HostMemoryAllocator::Allocate(std::size_t memory_size, uint8_t *memory_addr) { + GELOGI("HostMemoryAllocator::MallocMemory size= %zu.", memory_size); + return SUCCESS; +} + +Status HostMemoryAllocator::DeAllocate(uint8_t *memory_addr) { + if (rtFreeHost(memory_addr) != RT_ERROR_NONE) { + GELOGE(GE_GRAPH_FREE_FAILED, "MemoryAllocator::Free memory failed."); + return GE_GRAPH_FREE_FAILED; + } + memory_addr = nullptr; + return ge::SUCCESS; +} + +HostMemManager &HostMemManager::Instance() { + static HostMemManager mem_manager; + return mem_manager; +} + +Status HostMemManager::Initialize() { + std::lock_guard lock(mutex_); + allocator_ = std::unique_ptr(new (std::nothrow) HostMemoryAllocator()); + if (allocator_ == nullptr) { + GELOGE(GE_GRAPH_MALLOC_FAILED, "Host mem allocator init failed!"); + return GE_GRAPH_MALLOC_FAILED; + } + return SUCCESS; +} + +void HostMemManager::Finalize() noexcept { + std::lock_guard lock(mutex_); + + for (const auto &it : var_memory_base_map_) { + if (allocator_->DeAllocate(it.second.address) != SUCCESS) { + GELOGW("Host %s mem deAllocator failed!", it.first.c_str()); + } + } + var_memory_base_map_.clear(); +} + +Status HostMemManager::MallocMemoryForHostVar(const string &op_name, uint64_t tensor_size, uint8_t *&var_addr) { + std::lock_guard lock(mutex_); + if (var_memory_base_map_.find(op_name) != var_memory_base_map_.end()) { + GELOGI("Host mem for variable %s has been malloced", op_name.c_str()); + return SUCCESS; + } + GE_CHECK_NOTNULL(allocator_); + GE_CHK_STATUS(allocator_->Allocate(tensor_size, var_addr)); + HostMemInfo info(var_addr, tensor_size); + var_memory_base_map_[op_name] = info; + return SUCCESS; +} + +Status HostMemManager::QueryVarMemInfo(const string &op_name, uint64_t &base_addr, uint64_t &data_size) { + if (var_memory_base_map_.find(op_name) == var_memory_base_map_.end()) { + GELOGE(INTERNAL_ERROR, "Find host base base_addr failed,node name:%s!", op_name.c_str()); + return INTERNAL_ERROR; + } + base_addr = reinterpret_cast(reinterpret_cast(var_memory_base_map_[op_name].address)); + data_size = var_memory_base_map_[op_name].data_size; + return SUCCESS; +} +} // namespace ge diff --git a/src/ge/graph/manager/host_mem_manager.h b/src/ge/graph/manager/host_mem_manager.h new file mode 100644 index 00000000..3a5a0602 --- /dev/null +++ b/src/ge/graph/manager/host_mem_manager.h @@ -0,0 +1,73 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_MANAGER_HOST_VAR_MANAGER_H_ +#define GE_GRAPH_MANAGER_HOST_VAR_MANAGER_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "framework/common/ge_inner_error_codes.h" +#include "framework/common/ge_types.h" +#include "framework/common/l2_cache_optimize.h" +#include "graph/ge_tensor.h" +#include "graph/op_desc.h" +#include "graph/tensor.h" +#include "runtime/mem.h" + +namespace ge { +class HostMemoryAllocator { + public: + ~HostMemoryAllocator() = default; + + Status Allocate(std::size_t size, uint8_t *memory_addr); + Status DeAllocate(uint8_t *memory_addr); +}; + +struct HostMemInfo { + uint8_t *address; + uint64_t data_size; + HostMemInfo() : address(nullptr), data_size(0) {} + HostMemInfo(uint8_t *addr, uint64_t size) : address(addr), data_size(size) {} +}; + +class HostMemManager { + public: + HostMemManager() = default; + ~HostMemManager() { Finalize(); } + HostMemManager(const HostMemManager &) = delete; + HostMemManager &operator=(const HostMemManager &) = delete; + + static HostMemManager &Instance(); + Status Initialize(); + void Finalize() noexcept; + Status MallocMemoryForHostVar(const string &op_name, uint64_t tensor_size, uint8_t *&var_addr); + Status QueryVarMemInfo(const string &op_name, uint64_t &base_addr, uint64_t &data_size); + + private: + std::unordered_map var_memory_base_map_; + std::unique_ptr allocator_; + mutable std::recursive_mutex mutex_; +}; +} // namespace ge + +#endif // GE_GRAPH_MANAGER_HOST_VAR_MANAGER_H_ diff --git a/src/ge/graph/manager/memory_api.cc b/src/ge/graph/manager/memory_api.cc new file mode 100644 index 00000000..0a98e983 --- /dev/null +++ b/src/ge/graph/manager/memory_api.cc @@ -0,0 +1,45 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "framework/memory/memory_api.h" + +#include + +#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/host_mem_manager.h" +#include "graph/manager/rdma_pool_allocator.h" +#include "hccl/base.h" +#include "hccl/hcom.h" + +namespace ge { +Status InitRdmaPool(size_t size, rtMemType_t mem_type) { + GELOGD("InitRdmaPool in"); + return MemManager::Instance().RdmaPoolInstance(mem_type).InitMemory(size); +} + +Status RdmaRemoteRegister(const std::vector &var_info, rtMemType_t mem_type) { + GELOGD("Start to register rdma memory with host var size %zu", var_info.size()); + uint64_t device_base = 0; + uint64_t device_size = 0; + GE_CHK_STATUS_RET(MemManager::Instance().RdmaPoolInstance(mem_type).GetBaseAddr(device_base, device_size)); + return SUCCESS; +} + +Status GetVarBaseAddrAndSize(const string &var_name, uint64_t &base_addr, uint64_t &var_size) { + GELOGD("GetVarBaseAddrAndSize in"); + return HostMemManager::Instance().QueryVarMemInfo(var_name, base_addr, var_size); +} +} // namespace ge \ No newline at end of file diff --git a/src/ge/graph/manager/rdma_pool_allocator.cc b/src/ge/graph/manager/rdma_pool_allocator.cc new file mode 100644 index 00000000..1daeafb8 --- /dev/null +++ b/src/ge/graph/manager/rdma_pool_allocator.cc @@ -0,0 +1,179 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/manager/rdma_pool_allocator.h" +#include "framework/common/debug/ge_log.h" +#include "graph/manager/graph_mem_allocator.h" + +namespace { +const size_t kAlignedSize = 512; +const float kSplitThreshold = 0.5; + +inline size_t GetAlignedBlockSize(size_t size) { + if (size == 0) { + return kAlignedSize; + } + return kAlignedSize * ((size + kAlignedSize - 1) / kAlignedSize); +} + +inline bool ShouldSplit(const ge::Block *block, size_t size) { + return static_cast(size) <= (static_cast(block->size) * kSplitThreshold); +} + +inline bool CanMerge(ge::Block *block) { return block != nullptr && !block->allocated; } +} // namespace + +namespace ge { +RdmaPoolAllocator::RdmaPoolAllocator(rtMemType_t memory_type) + : memory_type_(memory_type), block_bin_(BlockBin([](const Block *left, const Block *right) { + if (left->size != right->size) { + return left->size < right->size; + } + return reinterpret_cast(left->ptr) < reinterpret_cast(right->ptr); + })) {} + +Status RdmaPoolAllocator::Initialize() { + memory_allocator_ = MemManager::Instance(memory_type_); + if (memory_allocator_ == nullptr) { + return ge::FAILED; + } + return ge::SUCCESS; +} +void RdmaPoolAllocator::Finalize() { + for (auto it = allocated_blocks_.begin(); it != allocated_blocks_.end();) { + auto block = it->second; + allocated_blocks_.erase(it); + delete block; + } + for (auto it = block_bin_.begin(); it != block_bin_.end();) { + auto block = *it; + block_bin_.erase(it); + delete block; + } + + if (rdma_base_addr_ != nullptr) { + if (memory_allocator_->FreeMemory(rdma_base_addr_) != SUCCESS) { + GELOGW("Free rdma pool memory failed"); + } + } +} + +Status RdmaPoolAllocator::InitMemory(size_t mem_size, uint32_t device_id) { + if (rdma_base_addr_ != nullptr) { + GELOGE(GE_MULTI_INIT, "Rdma pool has been malloced"); + return GE_MULTI_INIT; + } + const std::string purpose = "Memory for rdma pool."; + std::lock_guard lock(mutex_); + rdma_base_addr_ = memory_allocator_->MallocMemory(purpose, mem_size, device_id); + if (rdma_base_addr_ == nullptr) { + GELOGE(GE_GRAPH_MALLOC_FAILED, "Rdma pool memory malloc failed"); + return GE_GRAPH_MALLOC_FAILED; + } + rdma_mem_size_ = mem_size; + // Init with a base block. + auto *base_block = new (std::nothrow) Block(device_id, mem_size, rdma_base_addr_); + if (base_block == nullptr) { + GELOGE(GE_GRAPH_MALLOC_FAILED, "Block malloc failed"); + return GE_GRAPH_MALLOC_FAILED; + } + block_bin_.insert(base_block); + return SUCCESS; +} + +uint8_t *RdmaPoolAllocator::Malloc(size_t size, uint32_t device_id) { + auto aligned_size = GetAlignedBlockSize(size); + Block key(device_id, aligned_size, nullptr); + std::lock_guard lock(mutex_); + auto it = block_bin_.lower_bound(&key); + if (it != block_bin_.end()) { + Block *block = *it; + block_bin_.erase(it); + block->allocated = true; + if (block->ptr == nullptr) { + GELOGE(INTERNAL_ERROR, "Rdmapool memory address is nullptr."); + return nullptr; + } + allocated_blocks_.emplace(block->ptr, block); + GELOGI("Find block size = %zu", block->size); + + if (ShouldSplit(block, aligned_size)) { + auto *new_block = + new (std::nothrow) Block(device_id, block->size - aligned_size, nullptr, block->ptr + aligned_size); + if (new_block == nullptr) { + GELOGW("Block split failed"); + return block->ptr; + } + new_block->next = block->next; + if (block->next != nullptr) { + block->next->prev = new_block; + } + new_block->prev = block; + block->next = new_block; + block->size = aligned_size; + block_bin_.insert(new_block); + } + return block->ptr; + } + return nullptr; +} + +Status RdmaPoolAllocator::Free(uint8_t *memory_addr, uint32_t device_id) { + GELOGI("Free device id = %u", device_id); + if (memory_addr == nullptr) { + GELOGE(GE_GRAPH_FREE_FAILED, "Invalid memory pointer"); + return GE_GRAPH_FREE_FAILED; + } + + std::lock_guard lock(mutex_); + auto it = allocated_blocks_.find(memory_addr); + if (it == allocated_blocks_.end()) { + GELOGE(PARAM_INVALID, "Invalid memory pointer"); + return PARAM_INVALID; + } + Block *block = it->second; + block->allocated = false; + allocated_blocks_.erase(it); + block_bin_.insert(block); + // Each time merge with its pre and next. + MergeBlockNearby(block, block->next); + MergeBlockNearby(block->prev, block); + return SUCCESS; +} + +void RdmaPoolAllocator::MergeBlockNearby(Block *pre_block, Block *block) { + if (!(CanMerge(pre_block) && CanMerge(block))) { + return; + } + pre_block->size += block->size; + pre_block->next = block->next; + if (block->next != nullptr) { + block->next->prev = pre_block; + } + block_bin_.erase(block); + delete block; +} + +Status RdmaPoolAllocator::GetBaseAddr(uint64_t &base_addr, uint64_t &mem_size) { + if (rdma_base_addr_ == nullptr) { + GELOGE(INTERNAL_ERROR, "Rdma base addr is nullptr."); + return INTERNAL_ERROR; + } + base_addr = reinterpret_cast(reinterpret_cast(rdma_base_addr_)); + mem_size = rdma_mem_size_; + return SUCCESS; +} +} // namespace ge diff --git a/src/ge/graph/manager/rdma_pool_allocator.h b/src/ge/graph/manager/rdma_pool_allocator.h new file mode 100644 index 00000000..59d33916 --- /dev/null +++ b/src/ge/graph/manager/rdma_pool_allocator.h @@ -0,0 +1,71 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_MANAGER_RDMA_POOL_ALLOCATOR_H_ +#define GE_GRAPH_MANAGER_RDMA_POOL_ALLOCATOR_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "framework/common/ge_inner_error_codes.h" +#include "graph/manager/block_memory.h" +#include "graph/node.h" +#include "runtime/mem.h" + +namespace ge { +class MemoryAllocator; + +class RdmaPoolAllocator { + public: + explicit RdmaPoolAllocator(rtMemType_t memory_type); + + RdmaPoolAllocator(const RdmaPoolAllocator &) = delete; + + RdmaPoolAllocator &operator=(const RdmaPoolAllocator &) = delete; + + ~RdmaPoolAllocator() { Finalize(); } + + Status Initialize(); + void Finalize(); + + Status InitMemory(size_t mem_size, uint32_t device_id = 0); + + uint8_t *Malloc(size_t size, uint32_t device_id = 0); + + Status Free(uint8_t *memory_addr, uint32_t device_id = 0); + + Status GetBaseAddr(uint64_t &base_addr, uint64_t &mem_size); + + private: + void MergeBlockNearby(Block *pre_block, Block *block); + + rtMemType_t memory_type_; + size_t rdma_mem_size_ = 0; // Total rdma memory size to be allocated. + uint8_t *rdma_base_addr_ = nullptr; + MemoryAllocator *memory_allocator_ = nullptr; + BlockBin block_bin_; // Save all rdma blocks. + std::unordered_map allocated_blocks_; + // lock around all operations + mutable std::recursive_mutex mutex_; +}; +} // namespace ge + +#endif // GE_GRAPH_MANAGER_RDMA_POOL_ALLOCATOR_H_ diff --git a/src/ge/graph/manager/trans_var_data_utils.cc b/src/ge/graph/manager/trans_var_data_utils.cc index 3f346c91..60a0d0db 100644 --- a/src/ge/graph/manager/trans_var_data_utils.cc +++ b/src/ge/graph/manager/trans_var_data_utils.cc @@ -444,7 +444,7 @@ Status TransVarDataUtils::TransAllVarData(const vector &variable_nodes, rtError_t rt_ret = rtCtxSetCurrent(ctx); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Failed to set context, error_code is: 0x%X.", rt_ret); - return RT_FAILED; + return RT_ERROR_TO_GE_STATUS(rt_ret); } uint32_t allocated_graph_id = 0; Status ret = VarManager::Instance(session_id)->GetAllocatedGraphId(node->GetName(), allocated_graph_id); diff --git a/src/ge/graph/partition/dynamic_shape_partition.cc b/src/ge/graph/partition/dynamic_shape_partition.cc index 324129c4..903159b9 100644 --- a/src/ge/graph/partition/dynamic_shape_partition.cc +++ b/src/ge/graph/partition/dynamic_shape_partition.cc @@ -103,26 +103,32 @@ void DynamicShapePartitioner::PruneUniqueClusters() { if (unique_clusters_.count(cluster) != 0) { continue; } - unique_clusters_.insert(cluster); + if (unique_clusters_.insert(cluster).second) { + sorted_unique_clusters_.emplace_back(cluster); + } } + auto comp_func = [](std::shared_ptr clu_a, std::shared_ptr clu_b) -> bool { + return clu_a->Id() < clu_b->Id(); + }; + std::sort(sorted_unique_clusters_.begin(), sorted_unique_clusters_.end(), comp_func); } Status DynamicShapePartitioner::BuildPartitionFrame() { - for (const auto &cluster : unique_clusters_) { + for (const auto &cluster : sorted_unique_clusters_) { REQUIRE_SUCCESS(cluster->BuildFrame(), "Failed build frame of cluster[%lu].", cluster->Id()); } return SUCCESS; } Status DynamicShapePartitioner::CombinePartitionFrame() { - for (const auto &cluster : unique_clusters_) { + for (const auto &cluster : sorted_unique_clusters_) { REQUIRE_SUCCESS(cluster->CombinePartitionFrame(), "Failed combine frame of cluster[%lu].", cluster->Id()); } return SUCCESS; } Status DynamicShapePartitioner::BuildPartitionSubgraph() { - for (const auto &cluster : unique_clusters_) { + for (const auto &cluster : sorted_unique_clusters_) { REQUIRE_SUCCESS(cluster->BuildPartitionSubgraph(), "Failed build subgraph of cluster[%lu].", cluster->Id()); } return SUCCESS; @@ -171,6 +177,7 @@ void DynamicShapePartitioner::ClearResource() { node_2_cluster_.clear(); ordered_cluster_.clear(); unique_clusters_.clear(); + sorted_unique_clusters_.clear(); unknown_shape_nodes_.clear(); root_graph_.reset(); } @@ -220,8 +227,8 @@ Status DynamicShapePartitioner::TopologicalSortClusters() { std::queue ready_clusters; std::unordered_map cluster_pending_count; std::unordered_set seen_clusters; - for (auto &iter : node_2_cluster_) { - auto cluster = iter.second; + for (auto &node : root_graph_->GetDirectNode()) { + auto &cluster = node_2_cluster_[node]; if (seen_clusters.count(cluster) != 0) { continue; } @@ -756,6 +763,9 @@ Status Cluster::BuildPartitionSubgraph() { REQUIRE_GRAPH_SUCCESS(data_op->AddOutputDesc(input_desc), "Failed add output desc."); REQUIRE(AttrUtils::SetInt(data_op, ATTR_NAME_PARENT_NODE_INDEX, parent_node_index), "Failed set parent_node_index on subgraph data node."); + bool is_unknown_shape = IsUnknownShape(); + REQUIRE(AttrUtils::SetBool(data_op, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape), + "Failed set _is_unknown_shape flag on data op %s.", data_op->GetName().c_str()); auto data_node = subgraph_->AddNode(data_op); REQUIRE_NOT_NULL(data_node, "Failed add data node to subgraph."); REQUIRE_GRAPH_SUCCESS(data_node->SetOwnerComputeGraph(subgraph_), "Failed set owner graph of data node."); @@ -769,6 +779,9 @@ Status Cluster::BuildPartitionSubgraph() { } auto net_output_op = MakeShared(subgraph_->GetName() + "_" + NODE_NAME_NET_OUTPUT, ge::NETOUTPUT); REQUIRE_NOT_NULL(net_output_op, "Failed new memory for netoutput op."); + bool is_unknown_shape = IsUnknownShape(); + REQUIRE(AttrUtils::SetBool(net_output_op, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape), + "Failed set _is_unknown_shape flag on net_output_op %s.", net_output_op->GetName().c_str()); for (size_t i = 0; i < outputs_.size(); ++i) { GeTensorDesc input_desc; REQUIRE_GRAPH_SUCCESS(net_output_op->AddInputDesc(input_desc), "Failed add input desc."); @@ -812,4 +825,4 @@ void Cluster::Clear() { } size_t Cluster::unique_id_ = 0; -} // namespace ge \ No newline at end of file +} // namespace ge diff --git a/src/ge/graph/partition/dynamic_shape_partition.h b/src/ge/graph/partition/dynamic_shape_partition.h index 4cbd20b7..ba349b1c 100644 --- a/src/ge/graph/partition/dynamic_shape_partition.h +++ b/src/ge/graph/partition/dynamic_shape_partition.h @@ -150,6 +150,8 @@ class DynamicShapePartitioner { std::vector> ordered_cluster_; // Unique clusters left after merged clusters std::unordered_set> unique_clusters_; + // Unique clusters left after merged clusters sorted by rank + std::vector> sorted_unique_clusters_; // Nodes of root_graph_ that satisfy the unknowshape rules std::unordered_set unknown_shape_nodes_; }; diff --git a/src/ge/graph/partition/graph_partition.cc b/src/ge/graph/partition/graph_partition.cc index 907d672d..15f298c0 100644 --- a/src/ge/graph/partition/graph_partition.cc +++ b/src/ge/graph/partition/graph_partition.cc @@ -315,6 +315,11 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr GE_IF_BOOL_EXEC(!AttrUtils::SetStr(end_op_desc, "parentOpType", dst_node->GetType()), GELOGW("SetStr parentOpType failed");) GE_IF_BOOL_EXEC(!end_op_desc->SetExtAttr("parentNode", dst_node), GELOGW("SetEndExtAttr parentNode failed");) + OpDescPtr dst_node_op_desc = dst_node->GetOpDesc(); + GE_CHECK_NOTNULL(dst_node_op_desc); + GE_IF_BOOL_EXEC( + !AttrUtils::SetStr(end_op_desc, ATTR_NAME_END_REAR_NODE_ENGINE_NAME, dst_node_op_desc->GetOpEngineName()), + GELOGW("SetStr rearNodeEngineName failed");) // replace input_desc of end with owner node's desc int output_index = ge::AnchorUtils::GetIdx(out_anchor); bool is_need_update_desc = (output_index >= 0) && (graph_info_.mode_ == kPartitioning); @@ -364,6 +369,11 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr GE_IF_BOOL_EXEC(!AttrUtils::SetInt(pld_op_desc, "anchorIndex", AnchorUtils::GetIdx(out_anchor)), GELOGW("SetInt anchorIndex failed");) GE_IF_BOOL_EXEC(!pld_op_desc->SetExtAttr("parentNode", src_node), GELOGW("SetPldExtAttr parentNode failed");) + OpDescPtr src_node_op_desc = src_node->GetOpDesc(); + GE_CHECK_NOTNULL(src_node_op_desc); + GE_IF_BOOL_EXEC( + !AttrUtils::SetStr(pld_op_desc, ATTR_NAME_PLD_FRONT_NODE_ENGINE_NAME, src_node_op_desc->GetOpEngineName()), + GELOGW("SetStr frontNodeEngineName failed");) // do not care over flow graph_info_.num_of_pld_end_++; // replace output_desc of pld with input node's output desc diff --git a/src/ge/graph/passes/atomic_addr_clean_pass.cc b/src/ge/graph/passes/atomic_addr_clean_pass.cc index ae69fd93..2c7fb9bb 100644 --- a/src/ge/graph/passes/atomic_addr_clean_pass.cc +++ b/src/ge/graph/passes/atomic_addr_clean_pass.cc @@ -24,46 +24,19 @@ #include "common/ge_inner_error_codes.h" #include "common/ge/ge_util.h" +#include "graph/common/ge_call_wrapper.h" #include "graph/debug/ge_attr_define.h" #include "graph/utils/node_utils.h" #include "init/gelib.h" namespace ge { -namespace { -bool GraphShouldBeSkip(const ge::ComputeGraphPtr &graph) { - // Internal function, guaranteeing graph non-null - if (graph->GetParentGraph() == nullptr) { - return false; - } - return GraphUtils::IsUnknownShapeGraph(graph); -} -} // namespace - Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) { - if (graph == nullptr) { - GELOGE(PARAM_INVALID, "param [graph] must not be null."); - return PARAM_INVALID; - } - if (GraphShouldBeSkip(graph)) { - return SUCCESS; - } + GE_CHECK_NOTNULL(graph); GELOGD("AtomicAddrCleanPass begin."); // 1.Recoginze atomic and loop mark vector atomic_node_vec; for (NodePtr &node : graph->GetDirectNode()) { if (IsAtomicOp(node)) { - bool is_unknown = false; - auto ret_status = NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown); - if (ret_status != GRAPH_SUCCESS) { - GELOGW("Get node unknown status failed, node name:%s, type:%s.", node->GetName().c_str(), - node->GetType().c_str()); - continue; - } - if (is_unknown) { - GELOGI("Current node %s, type %s is unknown shape which should be skip.", node->GetName().c_str(), - node->GetType().c_str()); - continue; - } atomic_node_vec.push_back(node); } if (!is_loop_graph_ && node->GetType() == LOOPCOND) { @@ -76,6 +49,14 @@ Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) { GELOGI("There is no atomic node. Ignore atomicAddrClean pass."); return SUCCESS; } + + bool is_known_graph = graph->GetGraphUnknownFlag(); + if (is_known_graph) { + GELOGD("Graph[%s] is unknown graph. It will call fe interface to compile op.", graph->GetName().c_str()); + GE_CHK_STATUS_RET(CompileUnknownGraphOp(atomic_node_vec)); + return SUCCESS; + } + // 2.Insert clean node and link to atomic node Status ret; if (is_loop_graph_) { @@ -123,15 +104,28 @@ Status AtomicAddrCleanPass::HandleLoopGraph(ComputeGraphPtr &graph, const vector } Status AtomicAddrCleanPass::HandleNormalGraph(ComputeGraphPtr &graph, const vector &atomic_node_vec) { - GELOGD("Not loop graph. It will insert only 1 clean node."); + GELOGD("Not loop graph and unknown graph. It will insert only 1 clean node."); + + vector common_atomic_nodes; + auto ret = HandleDispersedAtomicNodes(graph, atomic_node_vec, common_atomic_nodes); + if (ret != SUCCESS) { + GELOGE(ret, "Handle dispersed atomic nodes failed, graph name is %s.", graph->GetName().c_str()); + return ret; + } + + if (common_atomic_nodes.empty()) { + GELOGI("common_atomic_nodes is empty"); + return SUCCESS; + } + // not loop graph , insert only one clean node in graph NodePtr clean_addr_node = InsertAtomicAddrCleanNode(graph); if (clean_addr_node == nullptr) { GELOGE(FAILED, "Insert AtomicAddrClean node failed. Ignore atomicAddrClean pass."); return FAILED; } - for (const auto &node : atomic_node_vec) { - auto ret = LinkToAtomicNode(node, clean_addr_node); + for (const auto &node : common_atomic_nodes) { + ret = LinkToAtomicNode(node, clean_addr_node); if (ret != SUCCESS) { GELOGE(ret, "Link control anchor failed from atomic node to atomic_addr_clean node."); return ret; @@ -143,7 +137,7 @@ Status AtomicAddrCleanPass::HandleNormalGraph(ComputeGraphPtr &graph, const vect for (auto &in_anchor : node->GetAllInDataAnchors()) { GE_CHECK_NOTNULL(in_anchor->GetPeerOutAnchor()); NodePtr peer_in_node = in_anchor->GetPeerOutAnchor()->GetOwnerNode(); - Status ret = LinkToAtomicNode(peer_in_node, clean_addr_node); + ret = LinkToAtomicNode(peer_in_node, clean_addr_node); if (ret != SUCCESS) { GELOGE(ret, "Link failed, %s : %s", peer_in_node->GetName().c_str(), clean_addr_node->GetName().c_str()); return ret; @@ -153,6 +147,44 @@ Status AtomicAddrCleanPass::HandleNormalGraph(ComputeGraphPtr &graph, const vect return SUCCESS; } +Status AtomicAddrCleanPass::HandleDispersedAtomicNodes(ComputeGraphPtr &graph, + const std::vector &atomic_node_vec, + std::vector &common_atomic_nodes) { + int index = 0; + for (const auto &node : atomic_node_vec) { + vector node_anchors_connect_netoutput; + // If GetBool fail, attr is_connect_netoutput is an empty vector. + (void)ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_NODE_CONNECT_OUTPUT, node_anchors_connect_netoutput); + if (!node_anchors_connect_netoutput.empty()) { + NodePtr dispersed_clean_addr_node = InsertAtomicAddrCleanNode(graph); + if (dispersed_clean_addr_node == nullptr) { + GELOGE(FAILED, "Insert AtomicAddrClean node failed. Ignore atomicAddrClean pass."); + return FAILED; + } + + auto dispersed_node_op_desc = dispersed_clean_addr_node->GetOpDesc(); + GE_CHECK_NOTNULL(dispersed_node_op_desc); + string node_name = dispersed_node_op_desc->GetName(); + std::ostringstream oss; + oss << node_name << "_" << index; + node_name = oss.str(); + dispersed_node_op_desc->SetName(node_name); + GELOGD("Inserted dispersed atomic clean node name is %s", node_name.c_str()); + ++index; + Status ret = LinkToAtomicNode(node, dispersed_clean_addr_node); + if (ret != SUCCESS) { + GELOGE(ret, "Link control anchor failed from atomic node: %s to atomic_addr_clean node: %s.", + node->GetName().c_str(), dispersed_clean_addr_node->GetName().c_str()); + return ret; + } + } else { + common_atomic_nodes.emplace_back(node); + } + } + + return SUCCESS; +} + NodePtr AtomicAddrCleanPass::InsertAtomicAddrCleanNode(ComputeGraphPtr &graph) { OpDescPtr op_desc = MakeShared(NODE_NAME_ATOMIC_ADDR_CLEAN, ATOMICADDRCLEAN); if (op_desc == nullptr) { @@ -265,4 +297,49 @@ Status AtomicAddrCleanPass::ClearStatus() { hcom_node_vec_.clear(); return SUCCESS; } + +Status AtomicAddrCleanPass::CompileUnknownGraphOp(const vector &atomic_node_vec) { + GE_TIMESTAMP_CALLNUM_START(UnknownGraphCompileOp); + std::unordered_map> node_vector_map; + std::shared_ptr instance = ge::GELib::GetInstance(); + if ((instance == nullptr) || !instance->InitFlag()) { + GELOGE(ge::GE_CLI_GE_NOT_INITIALIZED, "CompileSingleOp failed."); + return ge::GE_CLI_GE_NOT_INITIALIZED; + } + + for (auto &atomic_node : atomic_node_vec) { + auto op_desc = atomic_node->GetOpDesc(); + if (op_desc == nullptr) { + GELOGW("op desc is nullptr."); + continue; + } + string kernel_lib_name = op_desc->GetOpKernelLibName(); + if (kernel_lib_name.empty()) { + GELOGE(ge::INTERNAL_ERROR, "Get atomic node:%s(%s) kernel lib failed.", atomic_node->GetName().c_str(), + atomic_node->GetType().c_str()); + return ge::INTERNAL_ERROR; + } + + OpsKernelInfoStorePtr kernel_info = instance->OpsKernelManagerObj().GetOpsKernelInfoStore(kernel_lib_name); + GE_CHECK_NOTNULL(kernel_info); + node_vector_map[kernel_lib_name].emplace_back(atomic_node); + } + + for (auto &it : node_vector_map) { + auto &kernel_lib_name = it.first; + auto &node_vector = it.second; + OpsKernelInfoStorePtr kernel_info = instance->OpsKernelManagerObj().GetOpsKernelInfoStore(kernel_lib_name); + GE_CHECK_NOTNULL(kernel_info); + GE_TIMESTAMP_RESTART(UnknownGraphCompileOp); + auto ret = kernel_info->CompileOp(node_vector); + GELOGI("The atomic node size of compile op of %s is %zu", kernel_lib_name.c_str(), node_vector.size()); + GE_TIMESTAMP_ADD(UnknownGraphCompileOp); + if (ret != ge::SUCCESS) { + GELOGE(ret, "Compile atomic op failed, kernel lib name is %s", kernel_lib_name.c_str()); + return ret; + } + } + GE_TIMESTAMP_CALLNUM_END(UnknownGraphCompileOp, "AtomicAddrCleanPass::CompileUnknownGraphOp"); + return SUCCESS; +} } // namespace ge diff --git a/src/ge/graph/passes/atomic_addr_clean_pass.h b/src/ge/graph/passes/atomic_addr_clean_pass.h index 3640beef..e22c1792 100644 --- a/src/ge/graph/passes/atomic_addr_clean_pass.h +++ b/src/ge/graph/passes/atomic_addr_clean_pass.h @@ -74,6 +74,16 @@ class AtomicAddrCleanPass : public GraphPass { */ bool IsAtomicOp(const NodePtr &node); + /** + * Handle atomic node in unknown graph + * @param atomic_node_vec: atomic node vector in unknown graph + * @return + */ + Status CompileUnknownGraphOp(const vector &atomic_node_vec); + + Status HandleDispersedAtomicNodes(ComputeGraphPtr &graph, const std::vector &atomic_node_vec, + std::vector &common_atomic_nodes); + vector hcom_node_vec_; bool is_loop_graph_ = false; }; diff --git a/src/ge/graph/passes/attach_stream_label_pass.cc b/src/ge/graph/passes/attach_stream_label_pass.cc index 0c342d8c..9962821b 100644 --- a/src/ge/graph/passes/attach_stream_label_pass.cc +++ b/src/ge/graph/passes/attach_stream_label_pass.cc @@ -69,42 +69,14 @@ void AttachStreamLabelPass::FindNodes(const ComputeGraphPtr &graph) { for (const auto &node : stream_switch_nodes_) { for (const auto &out_ctrl_node : node->GetOutControlNodes()) { - MarkHeadNodes(out_ctrl_node, node); + GELOGD("branch_head_node %s of stream_switch %s.", out_ctrl_node->GetName().c_str(), node->GetName().c_str()); + branch_head_nodes_[out_ctrl_node] = node; } need_label_nodes_.emplace_back(node); } } /// -/// @brief Mark node as head_node of stream_switch -/// @param [in] node -/// @param [in] stream_switch -/// @return void -/// -void AttachStreamLabelPass::MarkHeadNodes(const NodePtr &node, const NodePtr &stream_switch) { - static const std::set bypass_type_set = {IDENTITY, IDENTITYN, CAST, TRANSDATA, - TRANSPOSE, TRANSPOSED, RESHAPE}; - std::stack nodes; - nodes.push(node); - std::set visited; - while (!nodes.empty()) { - NodePtr cur_node = nodes.top(); - nodes.pop(); - if (visited.count(cur_node) > 0) { - continue; - } - GELOGD("branch_head_node %s of stream_switch %s.", cur_node->GetName().c_str(), stream_switch->GetName().c_str()); - branch_head_nodes_[cur_node] = stream_switch; - if (bypass_type_set.count(cur_node->GetType()) > 0) { - for (const auto &out_node : cur_node->GetOutAllNodes()) { - nodes.push(out_node); - } - } - visited.insert(cur_node); - } -} - -/// /// @brief update cond branch /// @param [in] node /// @return Status diff --git a/src/ge/graph/passes/attach_stream_label_pass.h b/src/ge/graph/passes/attach_stream_label_pass.h index 743ce36e..fc6abd30 100644 --- a/src/ge/graph/passes/attach_stream_label_pass.h +++ b/src/ge/graph/passes/attach_stream_label_pass.h @@ -40,14 +40,6 @@ class AttachStreamLabelPass : public GraphPass { void FindNodes(const ComputeGraphPtr &graph); /// - /// @brief Mark node as head_node of stream_switch - /// @param [in] node - /// @param [in] stream_switch - /// @return void - /// - void MarkHeadNodes(const NodePtr &node, const NodePtr &stream_switch); - - /// /// @brief update cond branch /// @param [in] node /// @return Status diff --git a/src/ge/graph/passes/base_pass.cc b/src/ge/graph/passes/base_pass.cc index 629b08ba..4da51ab0 100644 --- a/src/ge/graph/passes/base_pass.cc +++ b/src/ge/graph/passes/base_pass.cc @@ -66,7 +66,7 @@ void AddNextIterNodes(const Node::Vistor &nodes, std::queue &n } Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unordered_set &nodes_re_pass, - std::unordered_set &nodes_deleted, std::unordered_set &nodes_seen) { + std::unordered_set &nodes_deleted, std::unordered_set &nodes_seen) { if (node == nullptr) { GELOGE(FAILED, "parameter is null."); return FAILED; @@ -106,7 +106,7 @@ Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unorder auto nodes_deleted_by_pass = name_to_pass.second->GetNodesDeleted(); nodes_deleted.insert(nodes_deleted_by_pass.begin(), nodes_deleted_by_pass.end()); - if (nodes_deleted_by_pass.count(node.get()) > 0) { + if (nodes_deleted_by_pass.count(node) > 0) { GELOGD("The node %s was deleted by pass %s, stop the remain passes", node->GetName().c_str(), name_to_pass.first.c_str()); break; @@ -153,7 +153,7 @@ Status BaseNodePass::IsolateAndDeleteNode(NodePtr &node, const std::vector return FAILED; } - AddNodeDeleted(node.get()); + AddNodeDeleted(node); return SUCCESS; } @@ -182,7 +182,7 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { GELOGD("Begin to run pass on graph, passes count %zu", names_to_passes.size()); std::queue nodes; std::unordered_set nodes_seen; - std::unordered_set nodes_deleted; + std::unordered_set nodes_deleted; std::unordered_set nodes_re_pass; std::unordered_set nodes_last; GetAllNodesNoInputEdge(graph_, nodes, nodes_seen, nodes_last); @@ -202,7 +202,7 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { (void)nodes_re_pass.erase(node); GE_IF_BOOL_EXEC(node == nullptr, GELOGW("node is null"); continue); - if (nodes_deleted.count(node.get()) > 0) { + if (nodes_deleted.count(node) > 0) { GELOGD("The node %s was deleted before, skip it.", node->GetName().c_str()); continue; } diff --git a/src/ge/graph/passes/base_pass.h b/src/ge/graph/passes/base_pass.h index dfba581e..6e7b292e 100644 --- a/src/ge/graph/passes/base_pass.h +++ b/src/ge/graph/passes/base_pass.h @@ -53,7 +53,7 @@ class BaseNodePass { std::unordered_set GetNodesNeedRePass() { return nodes_need_re_pass_; } - std::unordered_set GetNodesDeleted() { return nodes_deleted_; } + std::unordered_set GetNodesDeleted() { return nodes_deleted_; } void SetOption(NodePassOption option, const std::string &value) { options_[option] = value; } @@ -103,13 +103,13 @@ class BaseNodePass { /// next iterations. /// @param node /// - void AddNodeDeleted(Node *node) { nodes_deleted_.insert(node); } + void AddNodeDeleted(const NodePtr &node) { nodes_deleted_.insert(node); } bool OptionExists(NodePassOption option) { return options_.count(option) > 0; } private: std::unordered_set nodes_need_re_pass_; - std::unordered_set nodes_deleted_; + std::unordered_set nodes_deleted_; std::map options_; }; diff --git a/src/ge/graph/passes/bitcast_pass.cc b/src/ge/graph/passes/bitcast_pass.cc new file mode 100644 index 00000000..e8e1f84f --- /dev/null +++ b/src/ge/graph/passes/bitcast_pass.cc @@ -0,0 +1,148 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/bitcast_pass.h" + +#include +#include +#include "common/ge/ge_util.h" +#include "graph/utils/type_utils.h" +#include "framework/common/debug/log.h" +#include "framework/common/ge_inner_error_codes.h" + +namespace ge { +namespace { +const char *const kAttrNameType = "type"; +} // namespace + +Status BitcastPass::Run(NodePtr &node) { + GELOGD("Bitcast running"); + if (node == nullptr) { + GELOGE(PARAM_INVALID, "Param [node] must not be null."); + return PARAM_INVALID; + } + + if (node->GetType() != BITCAST) { + return SUCCESS; + } + + OpDescPtr op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + return PARAM_INVALID; + } + ge::DataType dst_data_type; + if (CheckDstDataType(op_desc, dst_data_type) != SUCCESS) { + return PARAM_INVALID; + } + + if (CheckOutputShape(op_desc, dst_data_type) != SUCCESS) { + return PARAM_INVALID; + } + + return IsolateAndDeleteNode(node, {0}); +} + +Status BitcastPass::CheckDstDataType(const OpDescPtr op_desc, ge::DataType &dst_data_type) { + if (!ge::AttrUtils::GetDataType(op_desc, kAttrNameType, dst_data_type)) { + GELOGE(PARAM_INVALID, "Node failed to get attribute type."); + return PARAM_INVALID; + } + if (dst_data_type >= ge::DT_UNDEFINED) { + GELOGE(PARAM_INVALID, "dst_data_type[%s] is not valid.", TypeUtils::DataTypeToSerialString(dst_data_type).c_str()); + return PARAM_INVALID; + } + + if (op_desc->GetOutputDescPtr(0) == nullptr) { + GELOGE(PARAM_INVALID, "Bitcast node outputDesc is null."); + return PARAM_INVALID; + } + if (op_desc->GetOutputDescPtr(0)->GetDataType() != dst_data_type) { + GELOGE(PARAM_INVALID, "dst_data_type[%s] is not equal to output_data_type[%s].", + TypeUtils::DataTypeToSerialString(dst_data_type).c_str(), + TypeUtils::DataTypeToSerialString(op_desc->GetOutputDescPtr(0)->GetDataType()).c_str()); + return PARAM_INVALID; + } + return SUCCESS; +} + +Status BitcastPass::CheckOutputShape(const OpDescPtr op_desc, const ge::DataType dst_data_type) { + const GeTensorDescPtr &input_tensor_desc = op_desc->MutableInputDesc(0); + const GeTensorDescPtr &output_tensor_desc = op_desc->MutableOutputDesc(0); + if (input_tensor_desc == nullptr) { + GELOGE(PARAM_INVALID, "input_tensor_desc must not be null."); + return PARAM_INVALID; + } + + // get origin data_type and shape + ge::DataType ori_data_type = input_tensor_desc->GetDataType(); + if (ori_data_type >= ge::DT_UNDEFINED) { + GELOGE(PARAM_INVALID, "ori_data_type[%s] is not valid.", TypeUtils::DataTypeToSerialString(ori_data_type).c_str()); + return PARAM_INVALID; + } + + if (ori_data_type == dst_data_type) { + GELOGW("Origin data type is equal to dest data type."); + return SUCCESS; + } + + BitcastPass::kVecInt64 dim_vec(input_tensor_desc->GetShape().GetDims()); + if (CalcAndUpdateShape(dim_vec, ori_data_type, dst_data_type) != SUCCESS) { + GELOGE(PARAM_INVALID, "CalcAndUpdateShape failed."); + return PARAM_INVALID; + } + + if (dim_vec != output_tensor_desc->GetShape().GetDims()) { + GELOGE(PARAM_INVALID, "out_put_shape is different from expectations."); + return PARAM_INVALID; + } + + return SUCCESS; +} + +Status BitcastPass::CalcAndUpdateShape(BitcastPass::kVecInt64 &dim_vec, ge::DataType ori_data_type, + ge::DataType dst_data_type) { + if (dim_vec.size() == 0) { + GELOGE(PARAM_INVALID, "Pre node shape size is zero."); + return PARAM_INVALID; + } + int64_t ori_data_size = GetSizeByDataType(ori_data_type); + int64_t dst_data_size = GetSizeByDataType(dst_data_type); + + if (ori_data_size == dst_data_size) { + return SUCCESS; + } else if (ori_data_size > dst_data_size) { + if (ori_data_size % dst_data_size != 0) { + GELOGE(PARAM_INVALID, "ori_data_size is not divisible by dst_data_size."); + return PARAM_INVALID; + } + dim_vec.push_back(ori_data_size / dst_data_size); + return SUCCESS; + } else { + if (dst_data_size % ori_data_size != 0) { + GELOGE(PARAM_INVALID, "dst_data_size is not divisible by ori_data_size."); + return PARAM_INVALID; + } + + if (dim_vec[dim_vec.size() - 1] != (dst_data_size / ori_data_size)) { + GELOGE(PARAM_INVALID, "The last dim is not equal to dst_data_size / ori_data_size."); + return PARAM_INVALID; + } + dim_vec.pop_back(); + } + return SUCCESS; +} + +} // namespace ge diff --git a/src/ge/graph/passes/bitcast_pass.h b/src/ge/graph/passes/bitcast_pass.h new file mode 100644 index 00000000..4a9e2e1b --- /dev/null +++ b/src/ge/graph/passes/bitcast_pass.h @@ -0,0 +1,41 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_PASSES_BITCAST_PASS_H_ +#define GE_GRAPH_PASSES_BITCAST_PASS_H_ + +#include "common/ge_inner_error_codes.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/types.h" +#include "graph/graph.h" +#include "graph/op_desc.h" +#include "graph/passes/base_pass.h" +#include "graph/passes/pass_utils.h" + +namespace ge { +class BitcastPass : public BaseNodePass { + public: + Status Run(ge::NodePtr &node) override; + typedef std::vector kVecInt64; + + private: + Status CheckDstDataType(const OpDescPtr op_desc, ge::DataType &dst_data_type); + Status CheckOutputShape(const OpDescPtr op_desc, const ge::DataType dst_data_type); + Status CalcAndUpdateShape(BitcastPass::kVecInt64 &dim_vec, ge::DataType ori_data_type, ge::DataType dst_data_type); +}; +} // namespace ge + +#endif // GE_GRAPH_PASSES_BITCAST_PASS_H_ diff --git a/src/ge/graph/passes/cast_translate_pass.cc b/src/ge/graph/passes/cast_translate_pass.cc index 2d67b0a8..ee67e93d 100644 --- a/src/ge/graph/passes/cast_translate_pass.cc +++ b/src/ge/graph/passes/cast_translate_pass.cc @@ -264,7 +264,7 @@ Status CastTranslatePass::FuseDstNTranslates(NodePtr &node) { GELOGE(FAILED, "[%s] RemoveNodeWithoutRelink failed.", out_data_node->GetName().c_str()); return FAILED; } - AddNodeDeleted(out_data_node.get()); + AddNodeDeleted(out_data_node); } return SUCCESS; diff --git a/src/ge/graph/passes/end_of_sequence_add_control_pass.cc b/src/ge/graph/passes/end_of_sequence_add_control_pass.cc new file mode 100644 index 00000000..a3928835 --- /dev/null +++ b/src/ge/graph/passes/end_of_sequence_add_control_pass.cc @@ -0,0 +1,139 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/end_of_sequence_add_control_pass.h" + +#include +#include + +#include "init/gelib.h" +#include "graph/node.h" + +namespace ge { + +Status EndOfSequenceAddControlPass::Run(ComputeGraphPtr graph) { + if (graph == nullptr) { + GELOGE(PARAM_INVALID, "param [graph] must not be null."); + return PARAM_INVALID; + } + if (graph->GetParentGraph() != nullptr) { + return SUCCESS; + } + NodePtr end_of_sequence = GetEndOfSequence(graph); + if (end_of_sequence == nullptr) { + return SUCCESS; + } + GELOGI("EndOfSequenceAddControlPass begin."); + + std::vector target_nodes; + for (NodePtr &node : graph->GetDirectNode()) { + if (node == nullptr) { + GELOGW("node is nullptr."); + continue; + } + string stream_label; + (void)AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label); + if (!stream_label.empty() || IsDataLikeNode(node)) { + continue; + } + // Save the nodes whose pre-nodes are all data-like node + auto in_data_nodes = node->GetInDataNodes(); + bool flag = false; + for (auto in_node : in_data_nodes) { + if (!IsDataLikeNode(in_node)) { + flag = true; + break; + } + } + if (flag) { + continue; + } + target_nodes.push_back(node); + } + // Insert control edge + Status status = AddControlEdge(end_of_sequence, target_nodes); + if (status != SUCCESS) { + GELOGE(FAILED, "Graph add EndOfSequence op out ctrl edge fail."); + return FAILED; + } + GELOGI("EndOfSequenceAddControlPass end."); + return SUCCESS; +} + +Status EndOfSequenceAddControlPass::AddControlEdge(NodePtr &end_of_sequence, std::vector &target_nodes) { + auto out_ctrl_anchor = end_of_sequence->GetOutControlAnchor(); + for (NodePtr &node : target_nodes) { + auto in_ctrl_anchor = node->GetInControlAnchor(); + if (in_ctrl_anchor == nullptr) { + continue; + } + Status status = GraphUtils::AddEdge(out_ctrl_anchor, in_ctrl_anchor); + if (status != GRAPH_SUCCESS) { + GELOGE(FAILED, "Graph add EndOfSequence op out ctrl edge fail, dst node: %s.", node->GetName().c_str()); + return FAILED; + } + GELOGI("Graph add EndOfSequence op out ctrl edge, dst node: %s.", node->GetName().c_str()); + } + return SUCCESS; +} + +inline NodePtr EndOfSequenceAddControlPass::GetEndOfSequence(const ComputeGraphPtr &graph) const { + // Internal function, guaranteeing graph non-null + for (NodePtr &node : graph->GetDirectNode()) { + if (node->GetType() == ENDOFSEQUENCE) { + return node; + } + } + return nullptr; +} + +bool EndOfSequenceAddControlPass::IsDataLikeNode(const NodePtr &node) { + std::shared_ptr instance_ptr = GELib::GetInstance(); + if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { + GELOGW("GELib not initialized"); + return false; + } + OpDescPtr op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + return false; + } + string engine_name = op_desc->GetOpEngineName(); + if (engine_name.empty()) { + engine_name = instance_ptr->DNNEngineManagerObj().GetDNNEngineName(node->GetOpDesc()); + } + const map schedulers = instance_ptr->DNNEngineManagerObj().GetSchedulers(); + // Only one scheduler has been supported by now + for (auto schedulers_iter = schedulers.begin(); schedulers_iter != schedulers.end(); ++schedulers_iter) { + const map cal_engines = schedulers_iter->second.cal_engines; + auto cal_engines_iter = cal_engines.find(engine_name); + if (cal_engines_iter == cal_engines.end()) { + GELOGW("No cal_engines found within engine %s, node name %s", engine_name.c_str(), node->GetName().c_str()); + continue; + } + EngineConfPtr engine_conf_ptr = cal_engines_iter->second; + if (engine_conf_ptr == nullptr) { + GELOGW("engine_conf_ptr within engine %s, node name %s is null", engine_name.c_str(), node->GetName().c_str()); + continue; + } + bool skip_assign_stream = engine_conf_ptr->skip_assign_stream; + if (skip_assign_stream) { + return true; + } + return false; + } + return false; +} +} // namespace ge diff --git a/src/ge/graph/passes/end_of_sequence_add_control_pass.h b/src/ge/graph/passes/end_of_sequence_add_control_pass.h new file mode 100644 index 00000000..2540a988 --- /dev/null +++ b/src/ge/graph/passes/end_of_sequence_add_control_pass.h @@ -0,0 +1,56 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_PASSES_END_OF_SEQUENCE_ADD_CONTROL_EDGE_PASS_H_ +#define GE_GRAPH_PASSES_END_OF_SEQUENCE_ADD_CONTROL_EDGE_PASS_H_ + +#include "graph/graph.h" +#include "inc/graph_pass.h" + +namespace ge { +class EndOfSequenceAddControlPass : public GraphPass { + public: + EndOfSequenceAddControlPass() {} + EndOfSequenceAddControlPass(const EndOfSequenceAddControlPass &eos_pass) = delete; + EndOfSequenceAddControlPass &operator=(const EndOfSequenceAddControlPass &eos_pass) = delete; + + ~EndOfSequenceAddControlPass() override {} + + Status Run(ComputeGraphPtr graph) override; + + private: + /** + * Get EndOfSequence node in graph, nullptr if not exist. + * @param graph + * @return EndOfSequence node + */ + inline NodePtr GetEndOfSequence(const ComputeGraphPtr &graph) const; + /** + * Check whether this node is a data-like node. + * @param node + * @return + */ + bool IsDataLikeNode(const NodePtr &node); + /** + * Check whether this node is a data-like node. + * @param node + * @return + */ + Status AddControlEdge(NodePtr &end_of_sequence, std::vector &target_nodes); +}; +} // namespace ge + +#endif // GE_GRAPH_PASSES_END_OF_SEQUENCE_ADD_CONTROL_EDGE_PASS_H_ diff --git a/src/ge/graph/passes/folding_pass.cc b/src/ge/graph/passes/folding_pass.cc index 4e51f1ca..44dbc182 100644 --- a/src/ge/graph/passes/folding_pass.cc +++ b/src/ge/graph/passes/folding_pass.cc @@ -291,7 +291,7 @@ Status FoldingPass::RemoveNodeKeepingCtrlEdges(NodePtr &node) { GELOGE(INTERNAL_ERROR, "Failed to remove node %s from graph", node->GetName().c_str()); return INTERNAL_ERROR; } - AddNodeDeleted(node.get()); + AddNodeDeleted(node); return SUCCESS; } diff --git a/src/ge/graph/passes/input_output_connection_identify_pass.cc b/src/ge/graph/passes/input_output_connection_identify_pass.cc new file mode 100644 index 00000000..45560bf5 --- /dev/null +++ b/src/ge/graph/passes/input_output_connection_identify_pass.cc @@ -0,0 +1,193 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/input_output_connection_identify_pass.h" + +#include +#include +#include +#include +#include + +#include "common/ge/ge_util.h" +#include "common/ge_inner_error_codes.h" +#include "framework/common/debug/ge_log.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/node_utils.h" + +using std::map; +using std::string; +using std::vector; + +namespace ge { +namespace { +inline bool IsDataOp(const std::string &node_type) { + return (node_type == DATA_TYPE) || (node_type == AIPP_DATA_TYPE) || (node_type == ANN_DATA_TYPE); +} +} // namespace + +Status InputOutputConnectionIdentifyPass::Run(ComputeGraphPtr graph) { + if (graph == nullptr) { + GELOGE(PARAM_INVALID, "Input param graph is null, skip identification of nodes that connect to input and output."); + return PARAM_INVALID; + } + + if (graph->GetParentGraph() != nullptr) { + GELOGD("Current graph %s is a subgraph, skip identification of nodes that connect to input and output.", + graph->GetName().c_str()); + return SUCCESS; + } + + GELOGD("Start to identify nodes that connect to input and output."); + if (graph->TopologicalSorting() != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Graph topological sort failed."); + return INTERNAL_ERROR; + } + + if (GraphUtils::GetRefMapping(graph, symbol_to_anchors_, anchor_to_symbol_) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Get ref-mapping for graph %s failed.", graph->GetName().c_str()); + return INTERNAL_ERROR; + } + + map> connect_input_node_idx_map; + map> connect_output_node_idx_map; + Status status = SUCCESS; + for (const NodePtr &node : graph->GetDirectNode()) { + // Not only node type Data is determined. + if (IsDataOp(node->GetType())) { + GELOGD("Find nodes that connect to root graph input node: %s.", node->GetName().c_str()); + status = ProcessInputNode(node, connect_input_node_idx_map, connect_output_node_idx_map); + if (status != SUCCESS) { + GELOGE(status, "Failed to process nodes that connect to input node: %s.", node->GetName().c_str()); + return status; + } + } + + if (node->GetType() == NETOUTPUT) { + GELOGD("Find nodes that connect to root graph output node: %s.", node->GetName().c_str()); + status = ProcessOutputNode(node, connect_input_node_idx_map, connect_output_node_idx_map); + if (status != SUCCESS) { + GELOGE(status, "Failed to process nodes that connect to output node: %s.", node->GetName().c_str()); + return status; + } + } + } + + status = SetNodeAttrOfConnectingInputOutput(connect_input_node_idx_map, connect_output_node_idx_map); + if (status != SUCCESS) { + GELOGE(status, "Failed to set attr for nodes that connect to input and output."); + return status; + } + + GELOGD("Success to identify nodes that connect to input and output."); + return SUCCESS; +} + +Status InputOutputConnectionIdentifyPass::ProcessInputNode(const NodePtr &node, + map> &connect_input_node_idx, + map> &connect_output_node_idx) { + GE_CHECK_NOTNULL(node); + for (const auto &out_data_anchor : node->GetAllOutDataAnchors()) { + // The return ptr of GetAllOutDataAnchors is always valid. + auto anchor_iter = anchor_to_symbol_.find(NodeIndexIO(node, out_data_anchor->GetIdx(), kOut).ToString()); + if (anchor_iter == anchor_to_symbol_.end()) { + GELOGW("Current node: %s out_data_anchor: %d is invalid, can not find related symbol.", node->GetName().c_str(), + out_data_anchor->GetIdx()); + continue; + } + + const string &symbol = anchor_iter->second; + auto status = UpdateNodeIdxMap(symbol, connect_input_node_idx, connect_output_node_idx); + if (status != SUCCESS) { + GELOGE(status, "Failed to update node anchor_index map."); + return status; + } + } + return SUCCESS; +} + +Status InputOutputConnectionIdentifyPass::UpdateNodeIdxMap(const string &symbol_string, + map> &connect_input_node_idx, + map> &connect_output_node_idx) { + auto symbol_iter = symbol_to_anchors_.find(symbol_string); + if (symbol_iter == symbol_to_anchors_.end()) { + GELOGE(PARAM_INVALID, "Input param symbol string: %s is invalid.", symbol_string.c_str()); + return PARAM_INVALID; + } + const auto &node_index_io_list = symbol_iter->second; + for (const auto &node_index_io : node_index_io_list) { + if (node_index_io.io_type_ == kOut) { + // find node that has shared output memory. + connect_output_node_idx[node_index_io.node_].emplace_back(node_index_io.index_); + } else { + // find node that has shared input memory. + connect_input_node_idx[node_index_io.node_].emplace_back(node_index_io.index_); + } + } + return SUCCESS; +} + +Status InputOutputConnectionIdentifyPass::ProcessOutputNode(const NodePtr &node, + map> &connect_input_node_idx, + map> &connect_output_node_idx) { + GE_CHECK_NOTNULL(node); + for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { + // The return ptr of GetAllInDataAnchors is always valid. + auto anchor_iter = anchor_to_symbol_.find(NodeIndexIO(node, in_data_anchor->GetIdx(), kIn).ToString()); + if (anchor_iter == anchor_to_symbol_.end()) { + GELOGW("Current node: %s in_data_anchor: %d is invalid, can not find related symbol.", node->GetName().c_str(), + in_data_anchor->GetIdx()); + continue; + } + + const string &symbol = anchor_iter->second; + auto status = UpdateNodeIdxMap(symbol, connect_input_node_idx, connect_output_node_idx); + if (status != SUCCESS) { + GELOGE(status, "Failed to update node anchor_index map."); + return status; + } + } + return SUCCESS; +} + +Status InputOutputConnectionIdentifyPass::SetNodeAttrOfConnectingInputOutput( + const map> &connect_input_node_idx, + const map> &connect_output_node_idx) { + for (const auto &iter : connect_input_node_idx) { + GE_CHECK_NOTNULL(iter.first); + if (iter.first->GetOpDesc() != nullptr) { + if (!AttrUtils::SetListInt(iter.first->GetOpDesc(), ATTR_NAME_NODE_CONNECT_INPUT, iter.second)) { + GELOGE(INTERNAL_ERROR, "Failed to set attr %s for node %s.", ATTR_NAME_NODE_CONNECT_INPUT.c_str(), + iter.first->GetName().c_str()); + return INTERNAL_ERROR; + } + } + } + + for (const auto &iter : connect_output_node_idx) { + GE_CHECK_NOTNULL(iter.first); + if (iter.first->GetOpDesc() != nullptr) { + if (!AttrUtils::SetListInt(iter.first->GetOpDesc(), ATTR_NAME_NODE_CONNECT_OUTPUT, iter.second)) { + GELOGE(INTERNAL_ERROR, "Failed to set attr %s for node %s.", ATTR_NAME_NODE_CONNECT_OUTPUT.c_str(), + iter.first->GetName().c_str()); + return INTERNAL_ERROR; + } + } + } + return SUCCESS; +} +} // namespace ge diff --git a/src/ge/graph/passes/input_output_connection_identify_pass.h b/src/ge/graph/passes/input_output_connection_identify_pass.h new file mode 100644 index 00000000..0dd32102 --- /dev/null +++ b/src/ge/graph/passes/input_output_connection_identify_pass.h @@ -0,0 +1,75 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_PASSES_INPUT_OUTPUT_CONNECTION_IDENTIFY_PASS_H_ +#define GE_GRAPH_PASSES_INPUT_OUTPUT_CONNECTION_IDENTIFY_PASS_H_ + +#include +#include +#include "graph/graph.h" +#include "inc/graph_pass.h" + +namespace ge { +class InputOutputConnectionIdentifyPass : public GraphPass { + public: + Status Run(ComputeGraphPtr graph) override; + + private: + /// + /// Find all nodes that connect to input node. + /// @param [in] input node + /// @param [out] map of nodes and anchor index that connect to input + /// @param [out] map of nodes and anchor index that connect to output + /// @return Status + /// + Status ProcessInputNode(const NodePtr &node, std::map> &connect_input_node_idx, + std::map> &connect_output_node_idx); + + /// + /// Find all nodes that connect to output node. + /// @param [in] output node + /// @param [out] map of nodes and anchor index that connect to input + /// @param [out] map of nodes and anchor index that connect to output + /// @return Status + /// + Status ProcessOutputNode(const NodePtr &node, std::map> &connect_input_node_idx, + std::map> &connect_output_node_idx); + + /// + /// Update all nodes that have shared memory. + /// @param [in] symbol string + /// @param [out] map of nodes and anchor index that connect to input + /// @param [out] map of nodes and anchor index that connect to output + /// @return Status + /// + Status UpdateNodeIdxMap(const string &symbol_string, std::map> &connect_input_node_idx, + std::map> &connect_output_node_idx); + + /// + /// Set attr for all nodes that connect to input and output. + /// @param [in] map of nodes and anchor index that connect to input + /// @param [in] map of nodes and anchor index that connect to output + /// @return Status + /// + Status SetNodeAttrOfConnectingInputOutput(const std::map> &connect_input_node_idx, + const std::map> &connect_output_node_idx); + + // Members for ref mapping + std::map> symbol_to_anchors_; + std::map anchor_to_symbol_; +}; +} // namespace ge +#endif // GE_GRAPH_PASSES_INPUT_OUTPUT_CONNECTION_IDENTIFY_PASS_H_ \ No newline at end of file diff --git a/src/ge/graph/passes/mark_graph_unknown_status_pass.cc b/src/ge/graph/passes/mark_graph_unknown_status_pass.cc new file mode 100644 index 00000000..7106e58c --- /dev/null +++ b/src/ge/graph/passes/mark_graph_unknown_status_pass.cc @@ -0,0 +1,35 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/mark_graph_unknown_status_pass.h" +#include "graph/utils/node_utils.h" + +namespace ge { +Status MarkGraphUnknownStatusPass::Run(ComputeGraphPtr graph) { + GE_CHECK_NOTNULL(graph); + bool is_unknown_shape = false; + for (const auto &node : graph->GetDirectNode()) { + GE_CHK_STATUS_RET(ge::NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown_shape), + "Get node[%s] shape status failed!", node->GetName().c_str()); + if (is_unknown_shape) { + break; + } + } + graph->SetGraphUnknownFlag(is_unknown_shape); + GELOGD("mark graph [%s] unknown status success! value is %d", graph->GetName().c_str(), is_unknown_shape); + return SUCCESS; +} +} // namespace ge \ No newline at end of file diff --git a/src/ge/graph/passes/mark_graph_unknown_status_pass.h b/src/ge/graph/passes/mark_graph_unknown_status_pass.h new file mode 100644 index 00000000..662e321c --- /dev/null +++ b/src/ge/graph/passes/mark_graph_unknown_status_pass.h @@ -0,0 +1,28 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_PASSES_MARK_GRAPH_UNKNOWN_STATUS_PASS_H_ +#define GE_GRAPH_PASSES_MARK_GRAPH_UNKNOWN_STATUS_PASS_H_ +#include "graph/graph.h" +#include "inc/graph_pass.h" + +namespace ge { +class MarkGraphUnknownStatusPass : public GraphPass { + public: + Status Run(ComputeGraphPtr graph); +}; +} // namespace ge +#endif // GE_GRAPH_PASSES_MARK_GRAPH_UNKNOWN_STATUS_PASS_H_ diff --git a/src/ge/graph/passes/mark_same_addr_pass.cc b/src/ge/graph/passes/mark_same_addr_pass.cc index 06d63393..0ed151d3 100644 --- a/src/ge/graph/passes/mark_same_addr_pass.cc +++ b/src/ge/graph/passes/mark_same_addr_pass.cc @@ -44,23 +44,8 @@ bool MarkSameAddrPass::IsNextNodeExpected(const ge::NodePtr &cur_node, const vec Status MarkSameAddrPass::Run(ComputeGraphPtr graph) { GELOGD("MarkSameAddrPass begin."); GE_CHECK_NOTNULL(graph); - auto parent_node = graph->GetParentNode(); - if (parent_node == nullptr) { - return SUCCESS; - } - auto parent_op_desc = parent_node->GetOpDesc(); - GE_CHECK_NOTNULL(parent_op_desc); - if (!parent_op_desc->HasAttr(ATTR_NAME_IS_UNKNOWN_SHAPE)) { - GELOGD("Graph[%s] do not have unknown shape attr. Parent node is %s", graph->GetName().c_str(), - parent_op_desc->GetName().c_str()); - return SUCCESS; - } - - bool is_unknown_shape = false; - (void)AttrUtils::GetBool(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape); - if (is_unknown_shape) { - GELOGD("Graph[%s] is unknown shape, do not need to set fixed addr attr. Parent node is %s", - graph->GetName().c_str(), parent_op_desc->GetName().c_str()); + if (graph->GetGraphUnknownFlag()) { + GELOGD("Graph[%s] is unknown shape, do not need to set fixed addr attr.", graph->GetName().c_str()); return SUCCESS; } diff --git a/src/ge/graph/passes/memcpy_addr_async_pass.cc b/src/ge/graph/passes/memcpy_addr_async_pass.cc new file mode 100644 index 00000000..7cbacc23 --- /dev/null +++ b/src/ge/graph/passes/memcpy_addr_async_pass.cc @@ -0,0 +1,245 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/memcpy_addr_async_pass.h" + +#include "common/ge/ge_util.h" +#include "framework/common/debug/log.h" +#include "graph/utils/node_utils.h" + +namespace ge { +Status MemcpyAddrAsyncPass::Run(ComputeGraphPtr graph) { + GE_CHECK_NOTNULL(graph); + for (auto &node : graph->GetAllNodes()) { + auto op_desc = node->GetOpDesc(); + GE_IF_BOOL_EXEC(op_desc == nullptr, continue); + + if (op_desc->GetType() == STREAMSWITCHN || op_desc->GetType() == STREAMMERGE) { + Status ret = AddMemcpyAddrAsyncNode(graph, node); + if (ret != SUCCESS) { + GELOGE(ret, "AddMemcpyAddrAsyncNode failed."); + return ret; + } + } + } + return SUCCESS; +} + +Status MemcpyAddrAsyncPass::AddMemcpyAddrAsyncNode(const ComputeGraphPtr &graph, const NodePtr &node) { + GELOGI("Start AddMemcpyAddrAsyncNode for %s.", node->GetName().c_str()); + for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { + OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); + GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); + NodePtr in_node = peer_out_anchor->GetOwnerNode(); + + if (in_node->GetType() == DATA) { + ComputeGraphPtr owner_graph = in_node->GetOwnerComputeGraph(); + GE_CHECK_NOTNULL(owner_graph); + // Data is in parent_graph + if (owner_graph->GetParentGraph() == nullptr) { + GELOGI("Need to insert MemcpyAddrAsync directly when data in parent graph."); + NodePtr memcpy_addr_async_node = CreateMemcpyAddrAsyncNode(graph, peer_out_anchor, node); + GE_IF_BOOL_EXEC(memcpy_addr_async_node == nullptr, GELOGE(INTERNAL_ERROR, "CreateMemcpyAddrAsyncNode failed."); + return INTERNAL_ERROR); + + Status ret = InsertMemcpyAddrAsyncNode(peer_out_anchor, in_data_anchor, memcpy_addr_async_node); + GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "InsertMemcpyAddrAsyncNode failed."); return ret); + } else { + uint32_t parent_index = 0; + if (!AttrUtils::GetInt(in_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { + GELOGE(INTERNAL_ERROR, "Failed to get parent index of %s", in_node->GetName().c_str()); + return INTERNAL_ERROR; + } + // Data is in sub_graph + GELOGI("Need to find data in parent graph, then insert MemcpyAddrAsync."); + NodePtr parent_node = owner_graph->GetParentNode(); + user_data_for_known_ = in_node; + out_of_user_data_for_known_ = node; + peer_out_anchor_for_known_ = peer_out_anchor; + in_anchor_for_known_ = in_data_anchor; + FindUserData(parent_node, parent_index); + if (find_user_data_) { + GELOGI("Insert memcpy_addr_async for non_dynamic."); + GE_CHECK_NOTNULL(peer_out_anchor_); + NodePtr memcpy_addr_async_node = CreateMemcpyAddrAsyncNode(graph, peer_out_anchor_, out_of_user_data_); + GE_IF_BOOL_EXEC(memcpy_addr_async_node == nullptr, + GELOGE(INTERNAL_ERROR, "CreateMemcpyAddrAsyncNode failed."); + return INTERNAL_ERROR); + + Status ret = InsertMemcpyAddrAsyncNode(peer_out_anchor_, in_anchor_, memcpy_addr_async_node); + GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "InsertMemcpyAddrAsyncNode failed."); return ret); + } + if (find_user_data_for_known_) { + GELOGI("Insert memcpy_addr_async for known graph."); + auto sub_graph = user_data_for_known_->GetOwnerComputeGraph(); + NodePtr memcpy_addr_async_node = + CreateMemcpyAddrAsyncNode(sub_graph, peer_out_anchor_for_known_, out_of_user_data_for_known_); + GE_IF_BOOL_EXEC(memcpy_addr_async_node == nullptr, + GELOGE(INTERNAL_ERROR, "CreateMemcpyAddrAsyncNode for known failed."); + return INTERNAL_ERROR); + + Status ret = + InsertMemcpyAddrAsyncNode(peer_out_anchor_for_known_, in_anchor_for_known_, memcpy_addr_async_node); + GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "InsertMemcpyAddrAsyncNode for known failed."); return ret); + } + } + } + } + return SUCCESS; +} + +void MemcpyAddrAsyncPass::FindUserDataForKnown(const NodePtr &parent_node, uint32_t &parent_index) { + GELOGI("Start FindUserDataForKnown of %s.", parent_node->GetName().c_str()); + if (user_data_for_known_->GetOpDesc() == nullptr) { + GELOGI("Cannot get op_desc of %s.", user_data_for_known_->GetName().c_str()); + return; + } + string src_var_name; + if (ge::AttrUtils::GetStr(user_data_for_known_->GetOpDesc(), REF_VAR_SRC_VAR_NAME, src_var_name)) { + GELOGI("The data in known graph is variable, no need to insert memcpy_addr_async."); + find_user_data_for_known_ = false; + return; + } else { + find_user_data_for_known_ = true; + } +} + +void MemcpyAddrAsyncPass::FindUserDataForNonDynamic(const ge::NodePtr &parent_node, uint32_t &parent_index) { + GELOGI("Start to FindUserDataForNonDynamic of %s.", parent_node->GetName().c_str()); + InDataAnchorPtr in_data_anchor = parent_node->GetInDataAnchor(parent_index); + OutDataAnchorPtr out_anchor = in_data_anchor->GetPeerOutAnchor(); + GE_IF_BOOL_EXEC(out_anchor == nullptr, + GELOGE(INTERNAL_ERROR, "Cannot find out_anchor of %s.", parent_node->GetName().c_str()); + return ); + NodePtr in_node = out_anchor->GetOwnerNode(); + GELOGI("in_node of parent_node is %s.", in_node->GetName().c_str()); + if (in_node->GetType() == DATA) { + if (in_node->GetOwnerComputeGraph()->GetParentGraph() != nullptr) { + // DATA is in sub graph again, update user_data of known firstly + user_data_for_known_ = in_node; + out_of_user_data_for_known_ = parent_node; + peer_out_anchor_for_known_ = out_anchor; + in_anchor_for_known_ = in_data_anchor; + NodePtr pre_in_node = in_node->GetOwnerComputeGraph()->GetParentNode(); + if (!AttrUtils::GetInt(in_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { + GELOGE(INTERNAL_ERROR, "Failed to refresh parent index of %s", in_node->GetName().c_str()); + return; + } + FindUserData(pre_in_node, parent_index); + } else { + // DATA is in parent graph and not has input + user_data_ = in_node; + out_of_user_data_ = parent_node; + peer_out_anchor_ = out_anchor; + in_anchor_ = in_data_anchor; + find_user_data_ = true; + GELOGI("%s connect with %s, will insert memcpyaddr.", user_data_->GetName().c_str(), + out_of_user_data_->GetName().c_str()); + } + } else if (in_node->GetType() == IF || in_node->GetType() == WHILE || in_node->GetType() == CASE) { + if (!AttrUtils::GetInt(parent_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { + GELOGE(INTERNAL_ERROR, "Failed to refresh parent index of %s", in_node->GetName().c_str()); + return; + } + FindUserData(in_node, parent_index); + } else { + GELOGI("%s connect with %s, which is not user_data.", parent_node->GetName().c_str(), in_node->GetName().c_str()); + find_user_data_ = false; + } +} + +void MemcpyAddrAsyncPass::FindUserData(const NodePtr &parent_node, uint32_t &parent_index) { + auto parent_op_desc = parent_node->GetOpDesc(); + if (parent_op_desc == nullptr) { + GELOGI("Cannot get op_desc of %s.", parent_node->GetName().c_str()); + return; + } + bool is_unknown_shape = false; + if (parent_node->GetType() == PARTITIONEDCALL && + AttrUtils::GetBool(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape) && !is_unknown_shape) { + FindUserDataForKnown(parent_node, parent_index); + } else { + FindUserDataForNonDynamic(parent_node, parent_index); + } +} + +NodePtr MemcpyAddrAsyncPass::CreateMemcpyAddrAsyncNode(const ComputeGraphPtr &graph, + const OutDataAnchorPtr &out_data_anchor, + const NodePtr &out_of_user_data) { + GELOGI("Start CreateMemcpyAddrAsyncNode."); + OpDescPtr pre_op_desc = out_data_anchor->GetOwnerNode()->GetOpDesc(); + GE_CHK_BOOL_EXEC(pre_op_desc != nullptr, return nullptr, "Op_desc of pre node is invalid."); + std::string node_name = pre_op_desc->GetName() + "_" + MEMCPYADDRASYNC; + + OpDescPtr op_desc = MakeShared(node_name, MEMCPYADDRASYNC); + GE_CHECK_NOTNULL_EXEC(op_desc, return nullptr); + + if (op_desc->AddInputDesc(pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add memcpy_addr_async input desc failed."); + return nullptr; + } + + if (op_desc->AddOutputDesc(pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add memcpy_addr_async output desc failed."); + return nullptr; + } + + int64_t stream_id = out_of_user_data->GetOpDesc()->GetStreamId(); + op_desc->SetStreamId(stream_id); + GELOGI("SetStreamId: Node %s assign stream is %ld.", op_desc->GetName().c_str(), stream_id); + bool labeled_input = false; + (void)ge::AttrUtils::GetBool(out_of_user_data->GetOpDesc(), ATTR_NAME_NODE_CONNECT_INPUT, labeled_input); + if (labeled_input) { + if (!ge::AttrUtils::SetBool(out_of_user_data->GetOpDesc(), ATTR_NAME_NODE_CONNECT_INPUT, false)) { + GELOGE(FAILED, "Failed to unset attr %s for node %s.", ATTR_NAME_NODE_CONNECT_INPUT.c_str(), + out_of_user_data->GetName().c_str()); + return nullptr; + } + if (!ge::AttrUtils::SetBool(op_desc, ATTR_NAME_NODE_CONNECT_INPUT, true)) { + GELOGE(FAILED, "Failed to set attr %s for node %s.", ATTR_NAME_NODE_CONNECT_INPUT.c_str(), + op_desc->GetName().c_str()); + return nullptr; + } + } + + NodePtr memcpy_addr_async_node = graph->AddNodeAfter(op_desc, out_data_anchor->GetOwnerNode()); + GE_CHECK_NOTNULL_EXEC(memcpy_addr_async_node, return nullptr); + + return memcpy_addr_async_node; +} + +Status MemcpyAddrAsyncPass::InsertMemcpyAddrAsyncNode(const OutDataAnchorPtr &out_anchor, + const InDataAnchorPtr &in_anchor, const NodePtr &node) { + // insert memcpy_addr of each user_data and out_of_user_data + if (GraphUtils::RemoveEdge(out_anchor, in_anchor) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Remove edge of %s and %s failed.", out_anchor->GetOwnerNode()->GetName().c_str(), + in_anchor->GetOwnerNode()->GetName().c_str()); + return INTERNAL_ERROR; + } + if (GraphUtils::AddEdge(out_anchor, node->GetInDataAnchor(0)) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add edge of %s and %s failed.", out_anchor->GetOwnerNode()->GetName().c_str(), + node->GetName().c_str()); + return INTERNAL_ERROR; + } + if (GraphUtils::AddEdge(node->GetOutDataAnchor(0), in_anchor) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add edge of %s and %s failed.", node->GetName().c_str(), + in_anchor->GetOwnerNode()->GetName().c_str()); + return INTERNAL_ERROR; + } + return SUCCESS; +} + +} // namespace ge diff --git a/src/ge/graph/passes/memcpy_addr_async_pass.h b/src/ge/graph/passes/memcpy_addr_async_pass.h new file mode 100644 index 00000000..9d99e505 --- /dev/null +++ b/src/ge/graph/passes/memcpy_addr_async_pass.h @@ -0,0 +1,51 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_PASSES_MEMCPY_ADDR_ASYNC_PASS_H_ +#define GE_GRAPH_PASSES_MEMCPY_ADDR_ASYNC_PASS_H_ + +#include "inc/graph_pass.h" + +namespace ge { + +class MemcpyAddrAsyncPass : public GraphPass { + public: + Status Run(ComputeGraphPtr graph); + + private: + Status AddMemcpyAddrAsyncNode(const ComputeGraphPtr &graph, const NodePtr &node); + void FindUserData(const NodePtr &node, uint32_t &parent_index); + void FindUserDataForKnown(const NodePtr &parent_node, uint32_t &parent_index); + void FindUserDataForNonDynamic(const ge::NodePtr &parent_node, uint32_t &parent_index); + + NodePtr CreateMemcpyAddrAsyncNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor, + const NodePtr &out_of_user_data); + Status InsertMemcpyAddrAsyncNode(const OutDataAnchorPtr &out_anchor, const InDataAnchorPtr &in_anchor, + const NodePtr &node); + + NodePtr user_data_; + NodePtr out_of_user_data_; + OutDataAnchorPtr peer_out_anchor_; + InDataAnchorPtr in_anchor_; + bool find_user_data_ = false; + NodePtr user_data_for_known_; + NodePtr out_of_user_data_for_known_; + OutDataAnchorPtr peer_out_anchor_for_known_; + InDataAnchorPtr in_anchor_for_known_; + bool find_user_data_for_known_ = false; +}; +} // namespace ge +#endif // GE_GRAPH_PASSES_MEMCPY_ADDR_ASYNC_PASS_H_ diff --git a/src/ge/graph/passes/merge_pass.cc b/src/ge/graph/passes/merge_pass.cc index f4114474..8e691518 100644 --- a/src/ge/graph/passes/merge_pass.cc +++ b/src/ge/graph/passes/merge_pass.cc @@ -66,7 +66,7 @@ Status MergePass::Run(NodePtr &node) { AddRePassNode(end_node); } for (const auto &delete_node : del_nodes) { - AddNodeDeleted(delete_node.get()); + AddNodeDeleted(delete_node); } return ret; } diff --git a/src/ge/graph/passes/multi_batch_pass.cc b/src/ge/graph/passes/multi_batch_pass.cc index 26190168..7d484a25 100644 --- a/src/ge/graph/passes/multi_batch_pass.cc +++ b/src/ge/graph/passes/multi_batch_pass.cc @@ -29,6 +29,9 @@ #include "graph/debug/ge_attr_define.h" #include "graph/utils/type_utils.h" +using std::string; +using std::vector; + namespace ge { Status MultiBatchPass::Run(ComputeGraphPtr graph) { GELOGD("MultiBatchPass Enter"); @@ -48,15 +51,21 @@ Status MultiBatchPass::Run(ComputeGraphPtr graph) { return FAILED; } + if (GetDynamicType() != SUCCESS) { + GELOGE(FAILED, "Get dynamic type failed."); + return FAILED; + } + std::vector> batch_shape; - if (!CheckSwitchN(batch_shape)) { + vector> combined_batch; + if (!CheckSwitchN(batch_shape, combined_batch)) { GELOGE(FAILED, "CheckSwitchN failed."); return FAILED; } FindSwitchOutNodes(batch_shape.size()); - if (ReplaceSwitchN(graph, pred_value, batch_shape) != SUCCESS) { + if (ReplaceSwitchN(graph, pred_value, batch_shape, combined_batch) != SUCCESS) { GELOGE(FAILED, "Replace SwitchN nodes failed."); return FAILED; } @@ -119,11 +128,44 @@ Status MultiBatchPass::FindPredValue(const ComputeGraphPtr &graph, OutDataAnchor } /// +/// @brief Get dynamic type: dynamic batch size: 1, dynamic image size: 2, dynamic dims: 3 +/// @return Status +/// +Status MultiBatchPass::GetDynamicType() { + for (const auto &switchn : switch_n_nodes_) { + auto switchn_desc = switchn->GetOpDesc(); + GE_CHECK_NOTNULL(switchn_desc); + int32_t dynamic_type = static_cast(FIXED); + if (!AttrUtils::GetInt(switchn_desc, ATTR_DYNAMIC_TYPE, dynamic_type)) { + GELOGE(FAILED, "Get attr ATTR_DYNAMIC_TYPE of node: %s failed.", switchn->GetName().c_str()); + return FAILED; + } + if (dynamic_type == static_cast(FIXED)) { + GELOGE(FAILED, "Attr ATTR_DYNAMIC_TYPE shouldn't be 0."); + return FAILED; + } + if (dynamic_type_ != static_cast(FIXED) && dynamic_type_ != dynamic_type) { + GELOGE(FAILED, "Attr ATTR_DYNAMIC_TYPE of all switchn node should be same, while one is %d and another is %d.", + dynamic_type, dynamic_type_); + return FAILED; + } + dynamic_type_ = dynamic_type; + } + if (dynamic_type_ == static_cast(FIXED)) { + GELOGE(FAILED, "Attr ATTR_DYNAMIC_TYPE shouldn't be 0."); + return FAILED; + } + + return SUCCESS; +} + +/// /// @brief Check SwitchN nodes /// @param [out] batch_shape +/// @param [out] combined_batch /// @return bool /// -bool MultiBatchPass::CheckSwitchN(std::vector> &batch_shape) { +bool MultiBatchPass::CheckSwitchN(vector> &batch_shape, vector> &combined_batch) { // Check if output_num of different SwitchN is same uint32_t batch_num = 0; for (const NodePtr &node : switch_n_nodes_) { @@ -136,22 +178,70 @@ bool MultiBatchPass::CheckSwitchN(std::vector> &batch_shape } } + if (!GetBatchInfo(batch_num, batch_shape, combined_batch)) { + GELOGE(FAILED, "Get batch info failed."); + return false; + } + + if (batch_shape.empty()) { + GELOGE(FAILED, "batch_shape is empty."); + return false; + } + if (combined_batch.empty()) { + GELOGE(FAILED, "combined_batch is empty."); + return false; + } + size_t dim_num = batch_shape[0].size(); + size_t combined_dim_num = combined_batch[0].size(); + for (uint32_t i = 1; i < batch_num; i++) { + size_t tmp_dim_num = batch_shape[i].size(); + if (dim_num != tmp_dim_num) { + GELOGE(FAILED, "Dim num of batch_shape not equal, batch_0:%zu, batch_%u:%zu.", dim_num, i, tmp_dim_num); + return false; + } + size_t tmp_combined_dim_num = combined_batch[i].size(); + if (combined_dim_num != tmp_combined_dim_num) { + GELOGE(FAILED, "Dim num of combined_batch not equal, batch_0:%zu, batch_%u:%zu.", dim_num, i, tmp_dim_num); + return false; + } + } + + return true; +} + +/// +/// @brief Check SwitchN nodes +/// @param [in] batch_num +/// @param [out] batch_shape +/// @param [out] combined_batch +/// @return bool +/// +bool MultiBatchPass::GetBatchInfo(uint32_t batch_num, vector> &batch_shape, + vector> &combined_batch) { // Check if output_shape of different SwitchN is same - std::vector> idx_batch_shape; + vector> idx_batch_shape; + vector> idx_combined_batch; for (uint32_t i = 0; i < batch_num; i++) { idx_batch_shape.clear(); + idx_combined_batch.clear(); for (const NodePtr &node : switch_n_nodes_) { - std::vector output_dims; OpDescPtr op_desc = node->GetOpDesc(); if (op_desc == nullptr) { GELOGE(FAILED, "CheckDims failed, get op_desc failed, node: %s.", node->GetName().c_str()); return false; } + vector output_dims; if (!AttrUtils::GetListInt(op_desc->GetOutputDesc(i), ATTR_NAME_SWITCHN_PRED_VALUE, output_dims)) { GELOGE(FAILED, "CheckDims failed, get attr ATTR_NAME_SWITCHN_PRED_VALUE failed, batch_index=%u.", i); return false; } idx_batch_shape.emplace_back(output_dims); + output_dims.clear(); + if (!AttrUtils::GetListInt(op_desc->GetOutputDesc(i), ATTR_NAME_COMBINED_DYNAMIC_DIMS, output_dims)) { + GELOGE(FAILED, "CheckDims failed, get attr ATTR_NAME_COMBINED_DYNAMIC_DIMS failed, batch_index=%u.", i); + return false; + } + idx_combined_batch.emplace_back(output_dims); } if (!CheckDims(idx_batch_shape)) { GELOGE(FAILED, "CheckDims failed, batch_index=%u.", i); @@ -159,22 +249,8 @@ bool MultiBatchPass::CheckSwitchN(std::vector> &batch_shape } batch_shape.emplace_back(idx_batch_shape[0]); + combined_batch.emplace_back(idx_combined_batch[0]); } - - // Check if dim_num of different batch is same - if (batch_shape.empty()) { - GELOGE(FAILED, "batch_shape is empty."); - return false; - } - uint32_t dim_num = batch_shape[0].size(); - for (uint32_t i = 1; i < batch_num; i++) { - uint32_t tmp_dim_num = batch_shape[i].size(); - if (dim_num != tmp_dim_num) { - GELOGE(FAILED, "dim_num not equal, batch_0:%u, batch_%u:%u.", dim_num, i, tmp_dim_num); - return false; - } - } - return true; } @@ -206,14 +282,16 @@ void MultiBatchPass::FindSwitchOutNodes(uint32_t batch_num) { /// @param [in] graph /// @param [in] pred_value /// @param [in] batch_shape +/// @param [in] combined_batch /// @return Status /// Status MultiBatchPass::ReplaceSwitchN(const ComputeGraphPtr &graph, const OutDataAnchorPtr &pred_value, - const std::vector> &batch_shape) { + const vector> &batch_shape, + const vector> &combined_batch) { NodePtr pred_value_node = pred_value->GetOwnerNode(); // Create SwitchCase node const std::string &switch_case_name = pred_value_node->GetName() + "_" + STREAMSWITCHN; - NodePtr switch_case = CreateSwitchCaseNode(graph, switch_case_name, pred_value, batch_shape); + NodePtr switch_case = CreateSwitchCaseNode(graph, switch_case_name, pred_value, batch_shape, combined_batch); if (switch_case == nullptr) { GELOGE(FAILED, "CreateSwitchCaseNode %s failed.", switch_case_name.c_str()); return FAILED; @@ -287,11 +365,13 @@ bool MultiBatchPass::CheckDims(const std::vector> &output_s /// @param [in] name /// @param [in] pred_value /// @param [in] batch_shape +/// @param [in] combined_batch /// @return ge::NodePtr /// NodePtr MultiBatchPass::CreateSwitchCaseNode(const ComputeGraphPtr &graph, const std::string &name, const OutDataAnchorPtr &pred_value, - const std::vector> &batch_shape) { + const vector> &batch_shape, + const vector> &combined_batch) { OpDescPtr op_desc = MakeShared(name, STREAMSWITCHN); if (op_desc == nullptr) { GELOGE(FAILED, "Create op_desc failed, StreamSwitchN:%s.", name.c_str()); @@ -320,12 +400,21 @@ NodePtr MultiBatchPass::CreateSwitchCaseNode(const ComputeGraphPtr &graph, const GELOGE(FAILED, "set attr ATTR_NAME_BATCH_NUM failed, StreamSwitchN:%s.", name.c_str()); return nullptr; } + if (!AttrUtils::SetInt(op_desc, ATTR_DYNAMIC_TYPE, dynamic_type_)) { + GELOGE(FAILED, "Set attr ATTR_DYNAMIC_TYPE failed, StreamSwitchN:%s.", name.c_str()); + return nullptr; + } for (uint32_t i = 0; i < batch_num; i++) { const std::string &attr_name = ATTR_NAME_PRED_VALUE + "_" + std::to_string(i); if (!AttrUtils::SetListInt(op_desc, attr_name, batch_shape[i])) { GELOGE(FAILED, "set attr ATTR_NAME_PRED_VALUE failed, StreamSwitchN:%s.", name.c_str()); return nullptr; } + const string &attr_combined_batch = ATTR_NAME_COMBINED_BATCH + "_" + std::to_string(i); + if (!AttrUtils::SetListInt(op_desc, attr_combined_batch, combined_batch[i])) { + GELOGE(FAILED, "set attr ATTR_NAME_COMBINED_BATCH failed, StreamSwitchN:%s.", name.c_str()); + return nullptr; + } } return switch_case_node; diff --git a/src/ge/graph/passes/multi_batch_pass.h b/src/ge/graph/passes/multi_batch_pass.h index 2e83262c..8f14ec0a 100644 --- a/src/ge/graph/passes/multi_batch_pass.h +++ b/src/ge/graph/passes/multi_batch_pass.h @@ -29,15 +29,19 @@ class MultiBatchPass : public GraphPass { private: Status FindPredValue(const ComputeGraphPtr &graph, OutDataAnchorPtr &pred_value); - bool CheckSwitchN(std::vector> &batch_shape); + Status GetDynamicType(); + bool CheckSwitchN(std::vector> &batch_shape, std::vector> &combined_batch); + bool GetBatchInfo(uint32_t batch_num, std::vector> &batch_shape, + std::vector> &combined_batch); void FindSwitchOutNodes(uint32_t batch_num); Status ReplaceSwitchN(const ComputeGraphPtr &graph, const OutDataAnchorPtr &pred_value, - const std::vector> &batch_shape); + const std::vector> &batch_shape, + const std::vector> &combined_batch); bool CheckDims(const std::vector> &output_shape) const; NodePtr CreateSwitchCaseNode(const ComputeGraphPtr &graph, const std::string &name, - const OutDataAnchorPtr &pred_value, - const std::vector> &batch_shape); + const OutDataAnchorPtr &pred_value, const std::vector> &batch_shape, + const std::vector> &combined_batch); Status BypassSwitchN(const NodePtr &switch_n_node, const NodePtr &switch_case_node); Status AttachLabel(const NodePtr &switch_case_node); Status AttachBatchLabel(uint32_t batch_idx); @@ -46,6 +50,7 @@ class MultiBatchPass : public GraphPass { std::vector switch_n_nodes_; std::vector bypass_nodes_; std::vector> batch_head_nodes_; + int32_t dynamic_type_ = 0; }; } // namespace ge #endif // GE_GRAPH_PASSES_MULTI_BATCH_PASS_H_ diff --git a/src/ge/graph/passes/net_output_pass.cc b/src/ge/graph/passes/net_output_pass.cc index 3c83d8ac..80aa141d 100644 --- a/src/ge/graph/passes/net_output_pass.cc +++ b/src/ge/graph/passes/net_output_pass.cc @@ -22,15 +22,21 @@ #include #include +#include "common/ge/ge_util.h" #include "framework/common/debug/ge_log.h" #include "framework/common/ge_inner_error_codes.h" -#include "common/ge/ge_util.h" +#include "framework/omg/omg_inner_types.h" +#include "graph/debug/ge_attr_define.h" #include "graph/passes/pass_utils.h" #include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" -#include "graph/debug/ge_attr_define.h" namespace ge { +static std::map output_type_str_to_datatype = { + {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"INT8", ge::DT_INT8}, {"INT16", ge::DT_INT16}, + {"UINT16", ge::DT_UINT16}, {"UINT8", ge::DT_UINT8}, {"INT32", ge::DT_INT32}, {"INT64", ge::DT_INT64}, + {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}}; + Status NetOutputPass::GetRetvalOutputInfo(const ge::NodePtr &node, std::map &retval_node_index_map) { GE_CHECK_NOTNULL(node); @@ -135,18 +141,6 @@ Status NetOutputPass::CheckOutputNodeInfo(const ComputeGraphPtr &graph, const st return SUCCESS; } -void NetOutputPass::AddInOutForNetOutputOp(const ge::ComputeGraphPtr &graph, const ge::OpDescPtr &net_output_desc, - const ge::NodePtr &src_node, int32_t src_index) { - /// Get the output attribute of src_node, - /// and set to the input/output of net_out_node. - if (src_node == nullptr || src_node->GetOpDesc() == nullptr || net_output_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "src node or net output desc is null."); - return; - } - ge::GeTensorDesc out_desc = src_node->GetOpDesc()->GetOutputDesc(src_index); - GE_IF_BOOL_EXEC(net_output_desc->AddInputDesc(out_desc) != SUCCESS, GELOGW("add input desc failed"); return ); -} - Status NetOutputPass::RemoveUnusedNode(const ge::ComputeGraphPtr &graph) { std::vector node_to_delete; // Delete _Retval operator. @@ -401,6 +395,7 @@ Status NetOutputPass::ProcessWithNetoutput(const ge::ComputeGraphPtr &graph, con GELOGE(INTERNAL_ERROR, "Update net output desc failed."); return INTERNAL_ERROR; } + if (UnLink(graph, output_node) != SUCCESS) { GELOGE(INTERNAL_ERROR, "UnLink connection between netoutput node and user set target node"); return INTERNAL_ERROR; @@ -430,7 +425,7 @@ Status NetOutputPass::AddCtrlEdgesBetweenLeafAndNetOutput(const ge::ComputeGraph return SUCCESS; } -Status NetOutputPass::CreateNetOutputNode(OpDescPtr &net_output_desc, ge::ComputeGraphPtr &graph) { +Status NetOutputPass::CreateNetOutputNode(OpDescPtr &net_output_desc, const ge::ComputeGraphPtr &graph) { // Only flush subgraph name string node_name = (graph->GetParentGraph() != nullptr) ? (graph->GetName() + "_" + NODE_NAME_NET_OUTPUT) : NODE_NAME_NET_OUTPUT; @@ -451,83 +446,185 @@ Status NetOutputPass::Run(ge::ComputeGraphPtr graph) { } GELOGI("NetOutputPass Run."); NodePtr output_node = graph->FindFirstNodeMatchType(NETOUTPUT); - OpDescPtr net_output_desc = nullptr; - std::vector output_nodes_info; - // save user targets node SaveAndRemoveTargets(graph); // If graph already has a netoutput node, doesn't need to create it again. if (output_node != nullptr) { (void)AttrUtils::SetListStr(output_node->GetOpDesc(), ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, std::move(std::vector())); - return ProcessWithNetoutput(graph, output_node); - } else { - if (CreateNetOutputNode(net_output_desc, graph) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Get net output nodes failed."); + if (ProcessWithNetoutput(graph, output_node) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Process with netoutput node failed."); return INTERNAL_ERROR; } - Status ret = GetOutputNode(graph, output_nodes_info); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Get net output nodes failed."); + } else { + if (AddNetOutputNodeToGraph(graph, output_node) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Set user define dtype and format for netoutput failed."); return INTERNAL_ERROR; } - GELOGI("[NETOUTPUT PASS] OutNodesInfo size:%zu, Targets Size:%zu, is_include_special_node_:%d", - graph->GetGraphOutNodesInfo().size(), graph->GetGraphTargetNodesInfo().size(), is_include_special_node_); - // If user does not set out nodes and targets and no retval node, return false - bool is_valid = (graph->GetGraphOutNodesInfo().size() == 0) && (graph->GetGraphTargetNodesInfo().size() == 0) && - (is_include_special_node_ == false); - if (is_valid) { - GELOGI("[NETOUTPUT PASS] output_nodes and target_nodes and special nodes is empty!It means no need netoutput!"); - return SUCCESS; - } - GELOGI("[NETOUTPUT PASS] Output node size:%lu.", output_nodes_info.size()); - if (output_nodes_info.empty()) { - // because retval node is contained by output_nodes_info, here means targets is non-empty - auto net_output_node = graph->AddNode(net_output_desc); - if (net_output_node == nullptr) { - GELOGE(INTERNAL_ERROR, "Add output node failed."); - return INTERNAL_ERROR; - } - GE_CHK_STATUS_RET(AddCtrlEdgeForTargets(net_output_node), "add ctrl edge for targets failed"); - // Add true stream, netoutput is 0 - GE_IF_BOOL_EXEC(!ge::AttrUtils::SetInt(net_output_node->GetOpDesc(), ATTR_NAME_TRUE_BRANCH_STREAM, 0), - GELOGE(INTERNAL_ERROR, "set ATTR_NAME_TRUE_BRANCH_STREAM failed"); - return INTERNAL_ERROR); - return SUCCESS; - } - std::vector is_input_const; - for (auto iter = output_nodes_info.begin(); iter != output_nodes_info.end();) { - ge::NodePtr src_node = iter->output_node; - if (src_node == nullptr) { - continue; - } - int32_t src_index = iter->node_output_index; - // if src_node is in targets_, no need to Add in and out for netoutput - auto it = targets_.find(src_node); - if (it != targets_.end()) { - iter = output_nodes_info.erase(iter); - GELOGD("node [%s] is in processed targets, do not add inout for netoutput!", src_node->GetName().c_str()); - continue; - } - AddInOutForNetOutputOp(graph, net_output_desc, src_node, src_index); - is_input_const.push_back(PassUtils::IsConstant(src_node)); - ++iter; - } - net_output_desc->SetIsInputConst(is_input_const); + } + // Add userdef attrs to netoutput node + return SetUserDefDTypeAndFormatFromAtcParams(output_node); +} + +Status NetOutputPass::AddNetOutputNodeToGraph(const ge::ComputeGraphPtr &graph, NodePtr &output_node) { + OpDescPtr net_output_desc = nullptr; + if (CreateNetOutputNode(net_output_desc, graph) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Get net output nodes failed."); + return INTERNAL_ERROR; + } + std::vector output_nodes_info; + if (GetOutputNode(graph, output_nodes_info) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Get net output nodes failed."); + return INTERNAL_ERROR; + } + GELOGI("[NETOUTPUT PASS] OutNodesInfo size:%zu, Targets Size:%zu, is_include_special_node_:%d", + graph->GetGraphOutNodesInfo().size(), graph->GetGraphTargetNodesInfo().size(), is_include_special_node_); + // If user does not set out nodes and targets and no retval node, return false + if ((graph->GetGraphOutNodesInfo().empty()) && (graph->GetGraphTargetNodesInfo().empty()) && + !is_include_special_node_) { + GELOGI("[NETOUTPUT PASS] output_nodes and target_nodes and special nodes is empty!It means no need netoutput!"); + return SUCCESS; + } + GELOGI("[NETOUTPUT PASS] Output node size:%lu.", output_nodes_info.size()); + if (output_nodes_info.empty()) { + // because retval node is contained by output_nodes_info, here means targets is non-empty output_node = graph->AddNode(net_output_desc); if (output_node == nullptr) { GELOGE(INTERNAL_ERROR, "Add output node failed."); return INTERNAL_ERROR; } - if (AddEdgesForNetOutput(graph, output_node, output_nodes_info) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Add edges for net output node failed."); - return INTERNAL_ERROR; + GE_CHK_STATUS_RET(AddCtrlEdgeForTargets(output_node), "add ctrl edge for targets failed"); + // Add true stream, netoutput is 0 + GE_IF_BOOL_EXEC(!ge::AttrUtils::SetInt(output_node->GetOpDesc(), ATTR_NAME_TRUE_BRANCH_STREAM, 0), + GELOGE(INTERNAL_ERROR, "set ATTR_NAME_TRUE_BRANCH_STREAM failed"); + return INTERNAL_ERROR); + return SUCCESS; + } + + AddInOutForNetOutputOp(graph, net_output_desc, output_nodes_info); + output_node = graph->AddNode(net_output_desc); + if (output_node == nullptr) { + GELOGE(INTERNAL_ERROR, "Add output node failed."); + return INTERNAL_ERROR; + } + if (AddEdgesForNetOutput(graph, output_node, output_nodes_info) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add edges for net output node failed."); + return INTERNAL_ERROR; + } + if (AddCtrlEdgesBetweenLeafAndNetOutput(graph, output_node) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add control edges between leaf and netoutput failed."); + return INTERNAL_ERROR; + } + GELOGI("Add NetOutput node success."); + return SUCCESS; +} +void NetOutputPass::AddInOutForNetOutputOp(const ComputeGraphPtr &graph, OpDescPtr &net_output_desc, + vector &output_nodes_info) { + std::vector is_input_const; + for (auto iter = output_nodes_info.begin(); iter != output_nodes_info.end();) { + NodePtr src_node = iter->output_node; + if (src_node == nullptr) { + continue; } - if (AddCtrlEdgesBetweenLeafAndNetOutput(graph, output_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Add control edges between leaf and netoutput failed."); - return INTERNAL_ERROR; + int32_t src_index = iter->node_output_index; + // if src_node is in targets_, no need to Add in and out for netoutput + auto it = targets_.find(src_node); + if (it != targets_.end()) { + iter = output_nodes_info.erase(iter); + GELOGD("node [%s] is in processed targets, do not add inout for netoutput!", src_node->GetName().c_str()); + continue; + } + /// Get the output attribute of src_node, + /// and set to the input/output of net_out_node. + if (src_node == nullptr || src_node->GetOpDesc() == nullptr || net_output_desc == nullptr) { + GELOGE(INTERNAL_ERROR, "src node or net output desc is null."); + return; + } + ge::GeTensorDesc out_desc = src_node->GetOpDesc()->GetOutputDesc(src_index); + GE_IF_BOOL_EXEC(net_output_desc->AddInputDesc(out_desc) != SUCCESS, GELOGW("add input desc failed"); return ); + is_input_const.push_back(PassUtils::IsConstant(src_node)); + ++iter; + } + net_output_desc->SetIsInputConst(is_input_const); +} + +bool NeedUpdateOutputByOutputTypeParm(std::string &output_type, NodePtr &src_node, uint32_t src_index, + ge::DataType &dt) { + if (output_type_str_to_datatype.find(output_type) != output_type_str_to_datatype.end()) { + dt = output_type_str_to_datatype[output_type]; + return true; + } + + auto op_desc = src_node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + vector output_data_type_vec; + vector index_vec; + if ((ge::AttrUtils::GetListDataType(op_desc, "_output_dt_list", output_data_type_vec)) && + (ge::AttrUtils::GetListInt(op_desc, "_output_dt_index", index_vec))) { + if (output_data_type_vec.size() != index_vec.size()) { + GELOGW("output_dt_list size is not match output_dt_index size"); + return false; + } + for (uint32_t i = 0; i < index_vec.size(); ++i) { + if (index_vec[i] == src_index) { + dt = output_data_type_vec[i]; + return true; + } } - GELOGI("Add NetOutput node success."); + } + return false; +} + +Status NetOutputPass::SetUserDefDTypeAndFormatFromAtcParams(const NodePtr &output_node) { + if (output_node == nullptr) { + GELOGI("[NETOUTPUT PASS] The graph no need netoutput node!"); + return SUCCESS; + } + auto output_type = domi::GetContext().output_type; + auto op_desc = output_node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + std::vector userdef_dtypes; + std::vector userdef_formats; + + ge::DataType output_data_type = ge::DT_FLOAT; + for (const auto &in_anchor : output_node->GetAllInDataAnchors()) { + auto index = static_cast(in_anchor->GetIdx()); + auto peer_out = in_anchor->GetPeerOutAnchor(); + if (peer_out == nullptr) { + // If user set target, peer_out anchor will be unlinked. + continue; + } + auto src_index = static_cast(peer_out->GetIdx()); + auto src_node = peer_out->GetOwnerNode(); + GE_CHECK_NOTNULL(src_node); + + // Update datatype + if (NeedUpdateOutputByOutputTypeParm(output_type, src_node, src_index, output_data_type)) { + GELOGD("Add user-define datatype:%s to netoutput node.", + TypeUtils::DataTypeToSerialString(output_data_type).c_str()); + userdef_dtypes.push_back( + std::to_string(index).append(":").append(TypeUtils::DataTypeToSerialString(output_data_type))); + continue; + } + // Output_node is not set,check if is_output_adjust_hw_layout is set + OpDescPtr src_op_desc = src_node->GetOpDesc(); + GE_CHECK_NOTNULL(src_op_desc); + bool set_fp16_nc1hwc0 = false; + (void)AttrUtils::GetBool(src_op_desc, "output_set_fp16_nc1hwc0", set_fp16_nc1hwc0); + if (set_fp16_nc1hwc0) { + // Set DT_FLOAT16 & FORMAT_NC1HWC0 + userdef_dtypes.push_back(std::to_string(index).append(":").append(TypeUtils::DataTypeToSerialString(DT_FLOAT16))); + userdef_formats.push_back( + std::to_string(index).append(":").append(TypeUtils::FormatToSerialString(FORMAT_NC1HWC0))); + } + } + if (!userdef_dtypes.empty() && !ge::AttrUtils::SetListStr(op_desc, ATTR_ATC_USER_DEFINE_DATATYPE, userdef_dtypes)) { + GELOGE(INTERNAL_ERROR, "Set user_define_dtype attr list for netoutput failed."); + return INTERNAL_ERROR; + } + if (!userdef_formats.empty() && !ge::AttrUtils::SetListStr(op_desc, ATTR_ATC_USER_DEFINE_FORMAT, userdef_formats)) { + GELOGE(INTERNAL_ERROR, "Set user_define_format attr list for netoutput failed."); + return INTERNAL_ERROR; } return SUCCESS; } diff --git a/src/ge/graph/passes/net_output_pass.h b/src/ge/graph/passes/net_output_pass.h index 5edf24fc..567d1246 100644 --- a/src/ge/graph/passes/net_output_pass.h +++ b/src/ge/graph/passes/net_output_pass.h @@ -73,7 +73,7 @@ class NetOutputPass : public GraphPass { /// @return OTHERS: Execution failed /// @author /// - Status CreateNetOutputNode(OpDescPtr &net_output_desc, ge::ComputeGraphPtr &graph); + Status CreateNetOutputNode(OpDescPtr &net_output_desc, const ge::ComputeGraphPtr &graph); /// /// Check if the network output node is legal @@ -89,13 +89,12 @@ class NetOutputPass : public GraphPass { /// Set input and output for the NetOutput node /// @param [in] graph: Input ComputeGraph /// @param [in] net_output_desc: OpDesc of the NetOutput node - /// @param [in] src_node: Source node of the NetOutput - /// @param [in] src_index: Output index of the Source node + /// @param [in] output_nodes_info: RetvalInfos of the NetOutput /// @return void /// @author /// - void AddInOutForNetOutputOp(const ge::ComputeGraphPtr &graph, const ge::OpDescPtr &net_output_desc, - const ge::NodePtr &src_node, int32_t src_index); + void AddInOutForNetOutputOp(const ComputeGraphPtr &graph, OpDescPtr &net_output_desc, + vector &output_nodes_info); /// /// Delete unwanted _Retval/Save/Summary nodes @@ -199,6 +198,25 @@ class NetOutputPass : public GraphPass { /// bool CheckNodeIsInOutputNodes(const ge::ComputeGraphPtr &graph, const ge::NodePtr &node); + /// + /// Add netoutput node to graph with output node infos + /// @param [in] graph: ComputeGraph + /// @param [in] output_node: shared_ptr to netoutput node + /// @return SUCCESS: Execution succeed + /// @return OTHERS: Execution failed + /// @author + /// + Status AddNetOutputNodeToGraph(const ge::ComputeGraphPtr &graph, NodePtr &output_node); + + /// + /// Add user_def_dtype & format for netoutput node + /// @param [in] output_node: The netOutput node + /// @return SUCCESS: Execution succeed + /// @return OTHERS: Execution failed + /// @author + /// + Status SetUserDefDTypeAndFormatFromAtcParams(const ge::NodePtr &output_node); + bool is_include_special_node_ = false; std::set targets_; friend class ReUpdateNetOutputPass; diff --git a/src/ge/graph/passes/next_iteration_pass.cc b/src/ge/graph/passes/next_iteration_pass.cc index c664ac53..12cde11e 100644 --- a/src/ge/graph/passes/next_iteration_pass.cc +++ b/src/ge/graph/passes/next_iteration_pass.cc @@ -104,26 +104,28 @@ Status NextIterationPass::FindWhileGroups() { GELOGE(INTERNAL_ERROR, "Get NextIteration node failed, frame_name: %s.", frame_name.c_str()); return INTERNAL_ERROR; } + loop_group_iter.second->merge_next_pairs.emplace_back(std::make_pair(out_node, next_node)); NodePtr switch_node = nullptr; if (FindTargetNode(out_node, SWITCH, false, switch_node) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Get Switch node failed, frame_name: %s.", frame_name.c_str()); return INTERNAL_ERROR; } + if (switch_node == nullptr) { + continue; + } NodePtr loop_cond = nullptr; if (FindTargetNode(switch_node, LOOPCOND, true, loop_cond) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Get LoopCond node failed, frame_name: %s.", frame_name.c_str()); return INTERNAL_ERROR; } - if (loop_group_iter.second->loop_cond == nullptr) { loop_group_iter.second->loop_cond = loop_cond; } else if (loop_group_iter.second->loop_cond != loop_cond) { GELOGE(FAILED, "Multi LoopCond nodes exist, frame_name: %s.", frame_name.c_str()); return FAILED; } - loop_group_iter.second->merge_next_pairs.emplace_back(std::make_pair(out_node, next_node)); } } } @@ -311,7 +313,7 @@ Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string } } - if (target_node == nullptr) { + if ((target_type != SWITCH) && (target_node == nullptr)) { GELOGE(INTERNAL_ERROR, "Find node %s failed.", target_type.c_str()); return INTERNAL_ERROR; } diff --git a/src/ge/graph/passes/resource_pair_add_control_pass.cc b/src/ge/graph/passes/resource_pair_add_control_pass.cc index c5be9600..bba8ee71 100644 --- a/src/ge/graph/passes/resource_pair_add_control_pass.cc +++ b/src/ge/graph/passes/resource_pair_add_control_pass.cc @@ -28,7 +28,6 @@ #include "graph/utils/tensor_adapter.h" namespace { -const char *const kSeparate = "/"; const std::map kResourcePairType = {{"StackPush", "StackPop"}}; const std::set kResourceTypes = {"StackPush", "StackPop"}; } // namespace @@ -41,15 +40,16 @@ Status ResourcePairAddControlPass::Run(ComputeGraphPtr graph) { // find all node of condition type, store with type and scope prefix key for (auto &node : graph->GetDirectNode()) { GE_CHECK_NOTNULL(node); - if (kResourceTypes.find(node->GetType()) != kResourceTypes.end()) { + auto node_type = node->GetType(); + if (kResourceTypes.find(node_type) != kResourceTypes.end()) { std::string node_name = node->GetName(); - std::string node_prefix; - size_t last_separate_index = node_name.find_last_of(kSeparate); - if (last_separate_index != std::string::npos) { - node_prefix = node_name.substr(0, last_separate_index); + std::string node_key(node_name); + std::size_t found = node_name.rfind(node_type); + if (found != std::string::npos) { + node_key.replace(found, node_type.size(), ""); } - prefix_2_node_per_type[node->GetType()][node_prefix] = node; - GELOGD("ResourcePairAddControlPass insert prefix:%s, op_name:%s, op_type:%s", node_prefix.c_str(), + prefix_2_node_per_type[node_type][node_key] = node; + GELOGD("ResourcePairAddControlPass insert node_key:%s, op_name:%s, op_type:%s", node_key.c_str(), node_name.c_str(), node->GetType().c_str()); } } diff --git a/src/ge/graph/passes/resource_pair_remove_control_pass.cc b/src/ge/graph/passes/resource_pair_remove_control_pass.cc index de3537f0..00d97798 100644 --- a/src/ge/graph/passes/resource_pair_remove_control_pass.cc +++ b/src/ge/graph/passes/resource_pair_remove_control_pass.cc @@ -28,7 +28,6 @@ #include "graph/utils/tensor_adapter.h" namespace { -const char *const kSeparate = "/"; const std::map kResourcePairType = {{"StackPush", "StackPop"}}; const std::set kResourceTypes = {"StackPush", "StackPop"}; } // namespace @@ -41,15 +40,16 @@ Status ResourcePairRemoveControlPass::Run(ComputeGraphPtr graph) { // find all node of condition type, store with type and scope prefix key for (auto &node : graph->GetDirectNode()) { GE_CHECK_NOTNULL(node); - if (kResourceTypes.find(node->GetType()) != kResourceTypes.end()) { + auto node_type = node->GetType(); + if (kResourceTypes.find(node_type) != kResourceTypes.end()) { std::string node_name = node->GetName(); - std::string node_prefix; - size_t last_separate_index = node_name.find_last_of(kSeparate); - if (last_separate_index != std::string::npos) { - node_prefix = node_name.substr(0, last_separate_index); + std::string node_key(node_name); + std::size_t found = node_name.rfind(node_type); + if (found != std::string::npos) { + node_key.replace(found, node_type.size(), ""); } - prefix_2_node_per_type[node->GetType()][node_prefix] = node; - GELOGD("ResourcePairRemoveControlPass insert prefix:%s, op_name:%s, op_type:%s", node_prefix.c_str(), + prefix_2_node_per_type[node_type][node_key] = node; + GELOGD("ResourcePairRemoveControlPass insert node_key:%s, op_name:%s, op_type:%s", node_key.c_str(), node_name.c_str(), node->GetType().c_str()); } } diff --git a/src/ge/graph/passes/set_input_output_offset_pass.cc b/src/ge/graph/passes/set_input_output_offset_pass.cc new file mode 100644 index 00000000..b4b34ecd --- /dev/null +++ b/src/ge/graph/passes/set_input_output_offset_pass.cc @@ -0,0 +1,231 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/set_input_output_offset_pass.h" + +#include "runtime/mem.h" + +namespace ge { +Status SetInputOutputOffsetPass::Run(ComputeGraphPtr graph) { + GE_CHECK_NOTNULL(graph); + for (auto &node : graph->GetDirectNode()) { + GE_CHECK_NOTNULL(node->GetOpDesc()); + vector connect_input; + (void)AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_NODE_CONNECT_INPUT, connect_input); + if (!connect_input.empty()) { + Status ret = SetInputOffset(graph, node); + if (ret != SUCCESS) { + GELOGE(ret, "SetInputOffset failed."); + return ret; + } + } + vector connect_output; + (void)AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_NODE_CONNECT_OUTPUT, connect_output); + if (!connect_output.empty()) { + Status ret = SetOutputOffset(graph, node, connect_output); + if (ret != SUCCESS) { + GELOGE(ret, "SetOutputOffset failed."); + return ret; + } + } + } + return SUCCESS; +} + +Status SetInputOutputOffsetPass::SetInputOffset(const ComputeGraphPtr &graph, const NodePtr &node) { + GELOGI("Start to SetInputOffset for %s.", node->GetName().c_str()); + std::vector memory_type; + auto op_desc = node->GetOpDesc(); + (void)ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type); + if (!memory_type.empty()) { + for (size_t i = 0; i < memory_type.size(); ++i) { + if (memory_type.at(i) != RT_MEMORY_L1) { + std::vector input_offset_of_node; + input_offset_of_node = op_desc->GetInputOffset(); + if (input_offset_of_node.size() < i) { + GELOGE(PARAM_INVALID, "not get input_offset of %zu", i); + return PARAM_INVALID; + } + int64_t input_offset = input_offset_of_node.at(i); + GELOGI("input_offset of %s is %ld.", node->GetName().c_str(), input_offset); + auto in_anchor = node->GetInDataAnchor(i); + GE_IF_BOOL_EXEC(in_anchor == nullptr, continue); + auto peer_out_anchor = in_anchor->GetPeerOutAnchor(); + GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); + int out_index = peer_out_anchor->GetIdx(); + auto data_op_desc = peer_out_anchor->GetOwnerNode()->GetOpDesc(); + GE_CHECK_NOTNULL(data_op_desc); + int64_t out_offset = data_op_desc->GetOutputOffset().at(out_index); + GELOGI("output_offset of %s is %ld.", peer_out_anchor->GetOwnerNode()->GetName().c_str(), out_offset); + vector zero_copy_basic_offset; + vector zero_copy_relative_offset; + + (void)ge::AttrUtils::GetListInt(data_op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset); + (void)ge::AttrUtils::GetListInt(data_op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset); + zero_copy_basic_offset.emplace_back(out_offset); + int64_t relative_offset = input_offset - out_offset; + zero_copy_relative_offset.emplace_back(relative_offset); + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(data_op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset), + GELOGE(FAILED, "SetListInt of zero_copy_basic_offset failed."); + return FAILED); + GE_CHK_BOOL_EXEC( + ge::AttrUtils::SetListInt(data_op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset), + GELOGE(FAILED, "SetListInt of zero_copy_relative_offset failed."); + return FAILED); + } + } + } + // Data->Hcom + bool is_input_continuous = false; + bool get_input_continuous = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + if (get_input_continuous && is_input_continuous) { + GELOGW("current not support set offset for %s.", node->GetName().c_str()); + } + return SUCCESS; +} + +Status SetInputOutputOffsetPass::SetOutputOffsetForConcat(const ComputeGraphPtr &graph, const NodePtr &node) { + GELOGI("Start SetOutputOffsetForConcat for %s.", node->GetName().c_str()); + auto op_desc = node->GetOpDesc(); + std::vector output_offset_of_concat; + output_offset_of_concat = op_desc->GetOutputOffset(); + // phony_concat has one output + GE_IF_BOOL_EXEC(output_offset_of_concat.size() != 1, + GELOGE(PARAM_INVALID, "%s should has one output.", node->GetName().c_str()); + return PARAM_INVALID); + NodePtr net_output = node->GetOutDataNodes().at(0); + auto out_op_desc = net_output->GetOpDesc(); + GE_CHECK_NOTNULL(out_op_desc); + vector zero_copy_basic_offset; + vector zero_copy_relative_offset; + (void)ge::AttrUtils::GetListInt(out_op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset); + (void)ge::AttrUtils::GetListInt(out_op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset); + + int64_t basic_offset = output_offset_of_concat.at(0); + GELOGI("output_offset of %s is %ld.", op_desc->GetName().c_str(), basic_offset); + for (InDataAnchorPtr &in_anchor : node->GetAllInDataAnchors()) { + OutDataAnchorPtr peer_out_anchor = in_anchor->GetPeerOutAnchor(); + GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); + NodePtr in_node = peer_out_anchor->GetOwnerNode(); + auto out_index = peer_out_anchor->GetIdx(); + std::vector output_offset_of_in_node; + GE_CHECK_NOTNULL(in_node->GetOpDesc()); + output_offset_of_in_node = in_node->GetOpDesc()->GetOutputOffset(); + GELOGI("input offset from %s to %s is %ld.", in_node->GetName().c_str(), op_desc->GetName().c_str(), + output_offset_of_in_node.at(out_index)); + int64_t relative_offset = output_offset_of_in_node.at(out_index) - basic_offset; + zero_copy_basic_offset.emplace_back(basic_offset); + zero_copy_relative_offset.emplace_back(relative_offset); + } + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(out_op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset), + GELOGE(FAILED, "SetListInt of zero_copy_basic_offset failed."); + return FAILED); + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(out_op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset), + GELOGE(FAILED, "SetListInt of zero_copy_relative_offset failed."); + return FAILED); + return SUCCESS; +} + +Status SetInputOutputOffsetPass::SetOutputOffsetForHcom(const ComputeGraphPtr &graph, const NodePtr &node, + const vector &connect_output) { + GELOGI("Start SetOutputOffsetForHcom, %s connect with %zu output.", node->GetName().c_str(), connect_output.size()); + vector output_offset_of_node; + output_offset_of_node = node->GetOpDesc()->GetOutputOffset(); + int connect_output_index = connect_output.at(0); + int64_t basic_offset = output_offset_of_node.at(connect_output_index); + GELOGI("basic_offset of %s is %ld.", node->GetName().c_str(), basic_offset); + + NodePtr net_output = node->GetOutDataNodes().at(connect_output_index); + auto out_op_desc = net_output->GetOpDesc(); + GE_CHECK_NOTNULL(out_op_desc); + vector zero_copy_basic_offset; + vector zero_copy_relative_offset; + (void)ge::AttrUtils::GetListInt(out_op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset); + (void)ge::AttrUtils::GetListInt(out_op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset); + + for (auto &out_anchor : node->GetAllOutDataAnchors()) { + GE_IF_BOOL_EXEC(out_anchor == nullptr, continue); + for (auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { + GE_IF_BOOL_EXEC(in_anchor == nullptr, continue); + NodePtr out_node = in_anchor->GetOwnerNode(); + auto in_index = in_anchor->GetIdx(); + std::vector input_offset_of_out_node; + GE_CHECK_NOTNULL(out_node->GetOpDesc()); + input_offset_of_out_node = out_node->GetOpDesc()->GetInputOffset(); + GELOGI("input offset from %s to %s is %ld.", node->GetName().c_str(), out_node->GetName().c_str(), + input_offset_of_out_node.at(in_index)); + int64_t relative_offset = input_offset_of_out_node.at(in_index) - basic_offset; + zero_copy_basic_offset.emplace_back(basic_offset); + zero_copy_relative_offset.emplace_back(relative_offset); + } + } + + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(out_op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset), + GELOGE(FAILED, "SetListInt of zero_copy_basic_offset failed."); + return FAILED); + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(out_op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset), + GELOGE(FAILED, "SetListInt of zero_copy_relative_offset failed."); + return FAILED); + return SUCCESS; +} + +Status SetInputOutputOffsetPass::SetOutputOffset(const ComputeGraphPtr &graph, const NodePtr &node, + const vector &connect_output) { + GELOGI("Start SetOutputOffset of %s.", node->GetName().c_str()); + bool attr_no_task = false; + bool get_attr_no_task = ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_NOTASK, attr_no_task); + if (get_attr_no_task && attr_no_task) { + bool is_input_continuous = false; + (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + bool buffer_fusion = CheckBufferFusion(node); + // A/B/C -> Phony_concat -> Netoutput : input_continuous + if (is_input_continuous || buffer_fusion) { + Status ret = SetOutputOffsetForConcat(graph, node); + if (ret != SUCCESS) { + GELOGE(ret, "SetOutputOffsetForConcat failed."); + return ret; + } + } + } + /* + // allreduce->netoutput : output_continuous + bool is_output_continuous = false; + bool get_output_continuous = + ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous); + if (get_output_continuous && is_output_continuous) { + Status ret = SetOutputOffsetForHcom(graph, node, connect_output); + if (ret != SUCCESS) { + GELOGE(ret, "SetOutputOffsetForHcom failed."); + return ret; + } + } + */ + return SUCCESS; +} + +bool SetInputOutputOffsetPass::CheckBufferFusion(const NodePtr &node) { + for (auto &in_node : node->GetInDataNodes()) { + GE_CHECK_NOTNULL(in_node); + auto op_desc = in_node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + if (!op_desc->HasAttr(ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION)) { + GELOGI("The node: %s not have ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION.", node->GetName().c_str()); + return false; + } + } + return true; +} +} // namespace ge \ No newline at end of file diff --git a/src/ge/graph/passes/set_input_output_offset_pass.h b/src/ge/graph/passes/set_input_output_offset_pass.h new file mode 100644 index 00000000..044fbd9e --- /dev/null +++ b/src/ge/graph/passes/set_input_output_offset_pass.h @@ -0,0 +1,36 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_PASSES_SET_INPUT_OUTPUT_OFFSET_PASS_H_ +#define GE_GRAPH_PASSES_SET_INPUT_OUTPUT_OFFSET_PASS_H_ + +#include "inc/graph_pass.h" + +namespace ge { +class SetInputOutputOffsetPass : public GraphPass { + public: + Status Run(ComputeGraphPtr graph) override; + + private: + Status SetInputOffset(const ComputeGraphPtr &graph, const NodePtr &node); + Status SetOutputOffset(const ComputeGraphPtr &graph, const NodePtr &node, const vector &connect_output); + Status SetOutputOffsetForConcat(const ComputeGraphPtr &graph, const NodePtr &node); + Status SetOutputOffsetForHcom(const ComputeGraphPtr &graph, const NodePtr &node, const vector &connect_output); + + bool CheckBufferFusion(const NodePtr &node); +}; +} // namespace ge +#endif // GE_GRAPH_PASSES_SET_INPUT_OUTPUT_OFFSET_PASS_H_ diff --git a/src/ge/graph/passes/switch_dead_branch_elimination.cc b/src/ge/graph/passes/switch_dead_branch_elimination.cc index f398d8df..dd7ace60 100644 --- a/src/ge/graph/passes/switch_dead_branch_elimination.cc +++ b/src/ge/graph/passes/switch_dead_branch_elimination.cc @@ -171,7 +171,7 @@ Status SwitchDeadBranchElimination::Run(NodePtr &node) { AddRePassNode(end_node); } for (const auto &delete_node : del_nodes) { - AddNodeDeleted(delete_node.get()); + AddNodeDeleted(delete_node); } } diff --git a/src/ge/graph/passes/switch_logic_remove_pass.cc b/src/ge/graph/passes/switch_logic_remove_pass.cc index be84a582..dafa3ae1 100644 --- a/src/ge/graph/passes/switch_logic_remove_pass.cc +++ b/src/ge/graph/passes/switch_logic_remove_pass.cc @@ -145,7 +145,7 @@ Status SwitchLogicRemovePass::RemoveSwitchNodeLogically(int parent_index, NodePt GE_CHECK_NOTNULL(node); GELOGD("Remove node %s from inactivate branch from switch %s", node->GetName().c_str(), switch_node->GetName().c_str()); - AddNodeDeleted(node.get()); + AddNodeDeleted(node); } for (auto &node : end_nodes) { GE_CHECK_NOTNULL(node); diff --git a/src/ge/graph/passes/transop_symmetry_elimination_pass.cc b/src/ge/graph/passes/transop_symmetry_elimination_pass.cc index 2ff7cd82..887079f8 100644 --- a/src/ge/graph/passes/transop_symmetry_elimination_pass.cc +++ b/src/ge/graph/passes/transop_symmetry_elimination_pass.cc @@ -15,21 +15,26 @@ */ #include "transop_symmetry_elimination_pass.h" +#include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" #include "framework/common/util.h" #include "graph/common/transop_util.h" +#include "graph/debug/ge_attr_define.h" #include "graph/utils/graph_utils.h" +#include "graph/utils/node_utils.h" #include "graph/utils/type_utils.h" +#include "types.h" namespace { const int kTransOpOutIndex = 0; -static std::map precision_loss_transfer_map = {{ge::DT_FLOAT, ge::DT_BOOL}}; +const std::set white_list_op{ge::TRANSPOSED, ge::RESHAPE, ge::REFORMAT, ge::CAST, ge::TRANSDATA}; +std::map precision_loss_transfer_map = {{ge::DT_FLOAT, ge::DT_BOOL}}; } // namespace namespace ge { Status TransOpSymmetryEliminationPass::Run(NodePtr &node) { GE_CHECK_NOTNULL(node); GE_CHECK_NOTNULL(node->GetOpDesc()); - if (!TransOpUtil::IsTransOp(node)) { + if (white_list_op.find(node->GetType()) == white_list_op.end()) { return SUCCESS; } GELOGD("Symmetry Elimination Pass in."); @@ -40,9 +45,8 @@ Status TransOpSymmetryEliminationPass::Run(NodePtr &node) { GE_CHECK_NOTNULL(peer_in_anchor->GetOwnerNode()); GE_CHECK_NOTNULL(peer_in_anchor->GetOwnerNode()->GetOpDesc()); if (!CheckCanBeEliminated(node, peer_in_anchor)) { - break; + continue; } - auto dst_node = peer_in_anchor->GetOwnerNode(); Status ret = EliminateTransOp(node, out_anchor, dst_node, peer_in_anchor); if (ret != SUCCESS) { @@ -70,12 +74,33 @@ bool TransOpSymmetryEliminationPass::CheckCanBeEliminated(const ge::NodePtr &src dst_node->GetType().c_str(), dst_in_anchor->GetIdx()); return false; } - if (!DescAreSymmetry(src_node, dst_node) || !CheckPrecisionLoss(src_node)) { - GELOGD("Not satisfied symmetry or has precision loss, ignore pass."); - return false; + if (src_node->GetType() == ge::RESHAPE) { + GE_CHECK_NOTNULL(src_node->GetOpDesc()); + auto unknown_dims_num = GetUnknownDimsNum(src_node->GetOpDesc()->GetInputDesc(0)); + if (unknown_dims_num != 0 && (unknown_dims_num == UNKNOWN_DIM_NUM || unknown_dims_num > 1)) { + GELOGD( + "Pre node %s is reshape op which input is dynamic shape and has more than one unknown dimension. " + "Ignore pass.", + src_node->GetName().c_str()); + return false; + } + } else if (src_node->GetType() == ge::TRANSPOSED) { + if (!JudgeTransposeDBack2Raw(src_node, dst_node)) { + GELOGD("Two Transpose op src node %s dst node %s will change the raw data. Ignore pass.", + src_node->GetName().c_str(), dst_node->GetName().c_str()); + return false; + } + } else if (src_node->GetType() == ge::TRANSDATA) { + auto unknown_dims_num = GetUnknownDimsNum(src_node->GetOpDesc()->GetInputDesc(0)); + if (unknown_dims_num == UNKNOWN_DIM_NUM) { + GELOGD("Pre node %s is transdata op which input is dynamic shape and all dimension are unknown(-2). Ignore pass.", + src_node->GetName().c_str()); + return false; + } } - return true; + return CheckPrecisionLoss(src_node) && DescAreSymmetry(src_node, dst_node); } + bool TransOpSymmetryEliminationPass::DescAreSymmetry(const NodePtr &src_node, const NodePtr &dst_node) { const auto &src_input_desc = src_node->GetOpDesc()->MutableInputDesc(0); const auto &dst_output_desc = dst_node->GetOpDesc()->MutableOutputDesc(0); @@ -88,15 +113,28 @@ bool TransOpSymmetryEliminationPass::DescAreSymmetry(const NodePtr &src_node, co const auto &dst_output_format = dst_output_desc->GetFormat(); const auto &dst_output_shape = dst_output_desc->GetShape().GetDims(); + bool is_symmetry = true; if (src_node->GetType() == CAST && dst_node->GetType() == CAST) { bool is_format_symmetry = (src_input_format == dst_output_format) || (dst_output_format == FORMAT_ND) || (src_input_format == FORMAT_ND); - return (src_input_dtype == dst_output_dtype) && is_format_symmetry; + is_symmetry = (src_input_dtype == dst_output_dtype) && is_format_symmetry; } else { - return (src_input_dtype == dst_output_dtype) && (src_input_shape == dst_output_shape) && - (src_input_format == dst_output_format); + is_symmetry = (src_input_dtype == dst_output_dtype) && (src_input_shape == dst_output_shape) && + (src_input_format == dst_output_format); } + if (!is_symmetry) { + GELOGD( + "Not satisfied symmetry. ignore pass.\n" + "Src node %s input type: %s format: %s shape: %s, " + "dst node %s output type: %s format: %s shape: %s. ", + src_node->GetName().c_str(), TypeUtils::DataTypeToSerialString(src_input_dtype).c_str(), + TypeUtils::FormatToSerialString(src_input_format).c_str(), formats::ShapeToString(src_input_shape).c_str(), + dst_node->GetName().c_str(), TypeUtils::DataTypeToSerialString(dst_output_dtype).c_str(), + TypeUtils::FormatToSerialString(dst_output_format).c_str(), formats::ShapeToString(dst_output_shape).c_str()); + } + return is_symmetry; } + bool TransOpSymmetryEliminationPass::CheckPrecisionLoss(const ge::NodePtr &src_node) { auto idx = TransOpUtil::GetTransOpDataIndex(src_node); auto input_desc = src_node->GetOpDesc()->GetInputDesc(idx); @@ -105,10 +143,59 @@ bool TransOpSymmetryEliminationPass::CheckPrecisionLoss(const ge::NodePtr &src_n auto dst_dtype = output_desc.GetDataType(); auto iter = precision_loss_transfer_map.find(src_dtype); if (iter != precision_loss_transfer_map.end() && iter->second == dst_dtype) { - GELOGW("Node %s transfer data type from %s to %s ,it will cause precision loss.", src_node->GetName().c_str(), - TypeUtils::DataTypeToSerialString(src_dtype).c_str(), TypeUtils::DataTypeToSerialString(dst_dtype).c_str()); + GELOGW("Node %s transfer data type from %s to %s ,it will cause precision loss. ignore pass.", + src_node->GetName().c_str(), TypeUtils::DataTypeToSerialString(src_dtype).c_str(), + TypeUtils::DataTypeToSerialString(dst_dtype).c_str()); + return false; + } + return true; +} + +int TransOpSymmetryEliminationPass::GetUnknownDimsNum(const GeTensorDesc &node_desc) { + // + // unknown_dims_num != 0 , is dynamic shape + // unknown_dims_num = UNKNOWN_DIM_NUM , all dims are unknown + // unknown_dims_num = n , n > 0 , has n dims unknown + // + int unknown_dims_num = 0; + auto ge_shape = node_desc.GetShape(); + for (const auto dim : ge_shape.GetDims()) { + if (dim == UNKNOWN_DIM_NUM) { + return UNKNOWN_DIM_NUM; + } + if (dim == UNKNOWN_DIM) { + ++unknown_dims_num; + } + } + return unknown_dims_num; +} + +bool TransOpSymmetryEliminationPass::JudgeTransposeDBack2Raw(const NodePtr &src_node, const NodePtr &dst_node) { + // + // A transpose to C : A---->(perm_1)---->B---->(perm_2)---->C + // we want to judge A is equal with C or not + // suppose A = C then: + // 1. B[i] = A[perm_1[i]] + // 2. C[i] = B[perm_2[i]] + // 3. combine 1 and 2 then: C[i] = A[perm_1[perm_2[i]]] + // which we get through 3: i = perm_1[perm_2[i]] + // + vector src_node_perm; + AttrUtils::GetListInt(src_node->GetOpDesc(), ge::PERMUTE_ATTR_PERM, src_node_perm); + vector dst_node_perm; + AttrUtils::GetListInt(dst_node->GetOpDesc(), ge::PERMUTE_ATTR_PERM, dst_node_perm); + + if (src_node_perm.size() != dst_node_perm.size()) { return false; } + for (size_t src_index = 0; src_index < src_node_perm.size(); ++src_index) { + if (dst_node_perm[src_index] >= static_cast(src_node_perm.size())) { + return false; + } + if (static_cast(src_index) != src_node_perm[dst_node_perm[src_index]]) { + return false; + } + } return true; } @@ -139,7 +226,18 @@ Status TransOpSymmetryEliminationPass::EliminateTransOp(NodePtr &src_node, const GELOGE(FAILED, "Copy control edge from %s to %s failed.", src_node->GetName().c_str(), dst_node->GetName().c_str()); return ret; } - // 4.IsolateAndDelete T2, A will link to B automatically, and all control edge will also relink. + // 4.Add control edge from T1 other input to T2, like reshape second input + for (const auto &in_node : src_node->GetInDataNodes()) { + if (in_node->GetName() == pre_normal_node->GetName()) { + continue; + } + ret = GraphUtils::AddEdge(in_node->GetOutControlAnchor(), dst_node->GetInControlAnchor()); + if (ret != GRAPH_SUCCESS) { + GELOGE(FAILED, "Add control edge from %s to %s failed.", in_node->GetName().c_str(), dst_node->GetName().c_str()); + return ret; + } + } + // 5.IsolateAndDelete T2, A will link to B automatically, and all control edge will also relink. ret = IsolateAndDeleteNode(dst_node, {0}); if (ret != GRAPH_SUCCESS) { GELOGE(INTERNAL_ERROR, "Isolate removed node: %s, type: %s failed", dst_node->GetName().c_str(), @@ -147,16 +245,16 @@ Status TransOpSymmetryEliminationPass::EliminateTransOp(NodePtr &src_node, const return ret; } GELOGI("Trans op symmetry eliminate successfully. Node %s has been removed.", dst_node->GetName().c_str()); - // 5.If T1 has no data out, isolate and deleted it. + // 6.If T1 has no data out, isolate and deleted it. if (src_node->GetOutDataNodesSize() == 0) { - // 5.1 Copy out control to pre normal node + // 6.1 Copy out control to pre normal node ret = GraphUtils::CopyOutCtrlEdges(src_node, pre_normal_node); if (ret != GRAPH_SUCCESS) { GELOGE(FAILED, "Copy control edge from %s to %s failed.", src_node->GetName().c_str(), dst_node->GetName().c_str()); return ret; } - // 5.2 Isolate and delete T1 + // 6.2 Isolate and delete T1 ret = IsolateAndDeleteNode(src_node, {}); if (ret != GRAPH_SUCCESS) { GELOGE(INTERNAL_ERROR, "Isolate removed node: %s, type: %s failed", src_node->GetName().c_str(), diff --git a/src/ge/graph/passes/transop_symmetry_elimination_pass.h b/src/ge/graph/passes/transop_symmetry_elimination_pass.h index b0cff0c9..7f7409b7 100644 --- a/src/ge/graph/passes/transop_symmetry_elimination_pass.h +++ b/src/ge/graph/passes/transop_symmetry_elimination_pass.h @@ -44,6 +44,21 @@ class TransOpSymmetryEliminationPass : public BaseNodePass { static bool DescAreSymmetry(const NodePtr &src_node, const NodePtr &dst_node); /// + /// get the number of unknown shape of node + /// @param node_desc: node to be checked + /// @return 0 , is not dynamic shape; UNKNOWN_DIM_NUM , all dims are unknown; n , n > 0 , has n dims unknown + /// + static int GetUnknownDimsNum(const GeTensorDesc &node_desc); + + /// + /// judge after two transposed op transform the raw data will be the same + /// @param src_node: first transposed op + /// @param dst_node: second transposed op + /// @return True or False, same or not + /// + static bool JudgeTransposeDBack2Raw(const NodePtr &src_node, const NodePtr &dst_node); + + /// /// two transform nodes can not be offset if there is precision loss, like FP32->BOOL BOOL->FP32. /// keep this pair of transform nodes if it has precision loss. /// @param src_node: the front node diff --git a/src/ge/graph/passes/transpose_transdata_pass.cc b/src/ge/graph/passes/transpose_transdata_pass.cc index 7ac7b7a3..3ac6dea5 100644 --- a/src/ge/graph/passes/transpose_transdata_pass.cc +++ b/src/ge/graph/passes/transpose_transdata_pass.cc @@ -135,7 +135,7 @@ Status TransposeTransDataPass::RemoveTranspose(NodePtr &node) { GE_CHECK_NOTNULL(anchor); anchor->UnlinkAll(); } - AddNodeDeleted(node.get()); + AddNodeDeleted(node); if (GraphUtils::RemoveNodeWithoutRelink(graph, node) != GRAPH_SUCCESS) { GELOGE(FAILED, "[%s] RemoveNodeWithoutRelink failed.", node->GetName().c_str()); return FAILED; diff --git a/src/ge/graph/passes/var_is_initialized_op_pass.cc b/src/ge/graph/passes/var_is_initialized_op_pass.cc index c88db80c..73456a7b 100644 --- a/src/ge/graph/passes/var_is_initialized_op_pass.cc +++ b/src/ge/graph/passes/var_is_initialized_op_pass.cc @@ -191,7 +191,7 @@ Status VarIsInitializedOpPass::ChangeNodeToConstant(NodePtr &node, bool inited) AddRePassNodesWithInOut(const_node); // delete VarIsInitializedOp node from the graph - AddNodeDeleted(node.get()); + AddNodeDeleted(node); return SUCCESS; } diff --git a/src/ge/graph/passes/variable_ref_delete_op_pass.cc b/src/ge/graph/passes/variable_ref_delete_op_pass.cc index 32236814..3487df47 100644 --- a/src/ge/graph/passes/variable_ref_delete_op_pass.cc +++ b/src/ge/graph/passes/variable_ref_delete_op_pass.cc @@ -20,6 +20,12 @@ namespace ge { Status VariableRefDeleteOpPass::Run(ge::ComputeGraphPtr graph) { GE_CHECK_NOTNULL(graph); + std::set all_var_names; + auto root_graph = GraphUtils::FindRootGraph(graph); + GE_CHECK_NOTNULL(root_graph); + for (const auto &n : root_graph->GetAllNodes()) { + all_var_names.insert(n->GetName()); + } for (auto &node : graph->GetDirectNode()) { GE_CHECK_NOTNULL(node->GetOpDesc()); std::string ref_var_src_var_name; @@ -28,6 +34,11 @@ Status VariableRefDeleteOpPass::Run(ge::ComputeGraphPtr graph) { if (!is_variable_ref) { continue; } + if (all_var_names.count(ref_var_src_var_name) == 0) { + GELOGE(FAILED, "Can not find source variable[%s] of variable ref[%s]", ref_var_src_var_name.c_str(), + node->GetName().c_str()); + return FAILED; + } Status ret = DealVariableRef(graph, node, ref_var_src_var_name); if (ret != SUCCESS) { GELOGE(ret, "variable ref [%s] delete failed", node->GetName().c_str()); @@ -56,20 +67,12 @@ Status VariableRefDeleteOpPass::DealVariableRef(ge::ComputeGraphPtr &graph, ge:: // add attr [REF_VAR_SRC_VAR_NAME] to the previous op output desc of the variable_ref auto op_desc = peer_node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); - auto out_desc = op_desc->GetOutputDesc(static_cast(index)); + auto out_desc = op_desc->MutableOutputDesc(static_cast(index)); bool is_set_str = ge::AttrUtils::SetStr(out_desc, REF_VAR_SRC_VAR_NAME, ref_var_src_var_name); - (void)op_desc->UpdateOutputDesc(static_cast(index), out_desc); - ge::NodePtr ref_var_src_var = GraphUtils::FindNodeFromAllNodes(graph, ref_var_src_var_name); - if (ref_var_src_var == nullptr) { - GELOGE(FAILED, "Can not find source variable[%s] of variable ref[%s]", ref_var_src_var_name.c_str(), - variable_ref->GetName().c_str()); - return FAILED; - } if (is_set_str) { GELOGI("[%s-%d]: add attr [REF_VAR_SRC_VAR_NAME: %s ] ", peer_node->GetName().c_str(), index, ref_var_src_var_name.c_str()); } - // remove variable_ref if (GraphUtils::IsolateNode(variable_ref, {0}) != GRAPH_SUCCESS) { GELOGE(INTERNAL_ERROR, "Isolate removed node: %s, type: %s failed", variable_ref->GetName().c_str(), diff --git a/src/ge/graph/preprocess/graph_preprocess.cc b/src/ge/graph/preprocess/graph_preprocess.cc index 94818698..09882eb1 100644 --- a/src/ge/graph/preprocess/graph_preprocess.cc +++ b/src/ge/graph/preprocess/graph_preprocess.cc @@ -455,135 +455,6 @@ VarNamesToRefs CollectVarNamesToRefs(const ComputeGraphPtr &graph) { } return names_to_refs; } -Status AddTransNodeBetweenTwoNodes(OutDataAnchorPtr &src_out, InDataAnchorPtr &insert_in, - OutDataAnchorPtr &insert_out) { - if ((src_out == nullptr) || (insert_in == nullptr) || (insert_out == nullptr)) { - GELOGE(INTERNAL_ERROR, "anchor is nullptr"); - return FAILED; - } - auto vistor = src_out->GetPeerInDataAnchors(); - for (auto it = vistor.begin(); it != vistor.end(); ++it) { - InDataAnchorPtr dst_in = *it; - GE_CHK_STATUS_RET(src_out->Unlink(dst_in), "Unlink the anchor failed"); - GE_CHK_STATUS_RET(insert_out->LinkTo(dst_in), "Link the anchor failed"); - } - GE_CHK_STATUS_RET(src_out->LinkTo(insert_in), "Link the anchor failed"); - return SUCCESS; -} - -NodePtr CreateCastOp(const ge::GeShape &shape, const ge::DataType input_data_type, const ge::DataType output_data_type, - const ge::Format format, NodePtr &node) { - static uint32_t transop_count = 0; - std::string name = std::string("cast_node").append(std::to_string(transop_count++)); - - GELOGI("create cast op:%s, input datatype:%s, out datatype:%s.", name.c_str(), - TypeUtils::DataTypeToSerialString(input_data_type).c_str(), - TypeUtils::DataTypeToSerialString(output_data_type).c_str()); - GeTensorDesc input(shape, format, input_data_type); - input.SetOriginFormat(format); - input.SetOriginShape(shape); - input.SetOriginDataType(input_data_type); - ge::TensorUtils::SetRealDimCnt(input, static_cast(shape.GetDims().size())); - - GeTensorDesc output(shape, format, output_data_type); - output.SetOriginFormat(format); - output.SetOriginShape(shape); - output.SetOriginDataType(output_data_type); - ge::TensorUtils::SetRealDimCnt(output, static_cast(shape.GetDims().size())); - - auto cast_node = CreateTransNode(name, CAST, input, output, node); - GELOGD("Create cast node success."); - return cast_node; -} - -Status ProcessInputFP16(NodePtr &node_ptr, bool &is_dynamic_batch, NodePtr &switchn_node) { - GE_CHECK_NOTNULL(node_ptr); - auto op_desc = node_ptr->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - const GeTensorDescPtr &input = op_desc->MutableInputDesc(0); - GE_CHECK_NOTNULL(input); - ge::DataType src_dtype = input->GetDataType(); - if (src_dtype == DT_FLOAT16) { - GELOGI("The node name, %s dtype is fp16", node_ptr->GetName().c_str()); - return SUCCESS; - } - input->SetDataType(DT_FLOAT16); - input->SetOriginDataType(DT_FLOAT16); - int64_t input_shape_size = 0; - int64_t output_shape_size = 0; - ge::graphStatus input_graph_status = ge::TensorUtils::GetTensorSizeInBytes(*input, input_shape_size); - ge::graphStatus output_graph_status = ge::TensorUtils::GetTensorMemorySizeInBytes(*input, output_shape_size); - if (input_graph_status != ge::GRAPH_SUCCESS && output_graph_status != ge::GRAPH_SUCCESS) { - GELOGE(GRAPH_FAILED, "GetTensorSize failed!"); - return FAILED; - } - ge::TensorUtils::SetSize(*input, input_shape_size); - const GeTensorDescPtr &output = op_desc->MutableOutputDesc(0); - GE_CHECK_NOTNULL(output); - output->SetDataType(DT_FLOAT16); - output->SetOriginDataType(DT_FLOAT16); - ge::TensorUtils::SetSize(*output, output_shape_size); - - if (!is_dynamic_batch) { - NodePtr cast_node = CreateCastOp(output->GetShape(), DT_FLOAT16, src_dtype, output->GetFormat(), node_ptr); - GE_CHECK_NOTNULL(cast_node); - OutDataAnchorPtr src_out = node_ptr->GetOutDataAnchor(0); - InDataAnchorPtr cast_in = cast_node->GetInDataAnchor(0); - OutDataAnchorPtr cast_out = cast_node->GetOutDataAnchor(0); - if (AddTransNodeBetweenTwoNodes(src_out, cast_in, cast_out) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "add node between two nodes failed, src name:%s, cast node name:%s.", - node_ptr->GetName().c_str(), cast_node->GetName().c_str()); - return FAILED; - } - } else { - auto switchn_op_desc = switchn_node->GetOpDesc(); - GE_CHECK_NOTNULL(switchn_op_desc); - const GeTensorDescPtr &switchn_input = switchn_op_desc->MutableInputDesc(0); - GE_CHECK_NOTNULL(switchn_input); - switchn_input->SetDataType(DT_FLOAT16); - switchn_input->SetOriginDataType(DT_FLOAT16); - for (uint32_t i = 0; i < switchn_node->GetAllOutDataAnchorsSize(); ++i) { - const GeTensorDescPtr &switchn_output = switchn_op_desc->MutableOutputDesc(i); - GE_CHECK_NOTNULL(switchn_output); - switchn_output->SetDataType(DT_FLOAT16); - switchn_output->SetOriginDataType(DT_FLOAT16); - NodePtr cast_node = - CreateCastOp(switchn_output->GetShape(), DT_FLOAT16, src_dtype, switchn_output->GetFormat(), node_ptr); - GE_CHECK_NOTNULL(cast_node); - OutDataAnchorPtr src_out = switchn_node->GetOutDataAnchor(i); - InDataAnchorPtr cast_in = cast_node->GetInDataAnchor(0); - OutDataAnchorPtr cast_out = cast_node->GetOutDataAnchor(0); - if (AddTransNodeBetweenTwoNodes(src_out, cast_in, cast_out) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "add node between two nodes failed, src name:%s, cast node name:%s.", - switchn_node->GetName().c_str(), cast_node->GetName().c_str()); - return FAILED; - } - } - } - return SUCCESS; -} - -NodePtr CreateTransdataNode(const ge::GeShape &in_shape, const ge::Format input_format, const ge::GeShape &out_shape, - const ge::Format output_format, const ge::DataType dt, NodePtr &node) { - static uint32_t transop_count = 0; - // Does not involve multithreading. - std::string name = std::string("transdata_node").append(std::to_string(transop_count++)); - - GELOGI("create trandata op:%s, input format:%s, out format:%s.", name.c_str(), - TypeUtils::FormatToSerialString(input_format).c_str(), TypeUtils::FormatToSerialString(output_format).c_str()); - - GeTensorDesc input(in_shape, input_format, dt); - input.SetOriginFormat(input_format); - input.SetOriginShape(in_shape); - input.SetOriginDataType(dt); - - GeTensorDesc output(out_shape, output_format, dt); - output.SetOriginFormat(output_format); - output.SetOriginShape(out_shape); - output.SetOriginDataType(dt); - - return CreateTransNode(name, TRANSDATA, input, output, node); -} Status TransferShape2NC1HWC0(Format src_format, const std::vector &src_shape, DataType dt, Format dst_format, std::vector &dst_shape) { @@ -652,6 +523,7 @@ Status ModifyFormatAndShapeForSingleTensor(const GeTensorDescPtr &input_output) input_output->SetShape(ge::GeShape(dst_shape_dims)); return SUCCESS; } + Status ModifyDataNetOutputFormatAndShape(OpDescPtr &op_desc, uint32_t index, Format storage_format, vector &dst_shape_dims) { GE_CHECK_NOTNULL(op_desc); @@ -668,85 +540,24 @@ Status ModifyDataNetOutputFormatAndShape(OpDescPtr &op_desc, uint32_t index, For output->SetShape(ge::GeShape(dst_shape_dims)); output->SetFormat(storage_format); - int64_t size = 0; - graphStatus graph_status = TensorUtils::GetTensorMemorySizeInBytes(*output, size); - if (graph_status != ge::GRAPH_SUCCESS) { - GELOGE(graph_status, "GetTensorSizeInBytes failed!"); - return FAILED; - } - ge::TensorUtils::SetSize(*input, size); - ge::TensorUtils::SetSize(*output, size); - - GELOGI( - "Modify Data NetOutput format and shape success, node:%s, index:%d, old_shape:%s, old_Format:%s, " - "new_shape:%s, new_format:%s, new_size:%u", - op_desc->GetName().c_str(), index, formats::JoinToString(old_shape).c_str(), - ge::TypeUtils::FormatToSerialString(old_format).c_str(), formats::JoinToString(dst_shape_dims).c_str(), - ge::TypeUtils::FormatToSerialString(storage_format).c_str(), size); - return SUCCESS; -} -Status ProcessInputNC1HWC0(NodePtr &node_ptr, bool &is_dynamic_batch, NodePtr &switchn_node) { - GE_CHECK_NOTNULL(node_ptr); - auto op_desc = node_ptr->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - const GeTensorDescPtr &input = op_desc->MutableInputDesc(0); - GE_CHECK_NOTNULL(input); - ge::Format old_format = input->GetFormat(); - ge::GeShape old_shape = input->GetShape(); - bool support = ((old_format == FORMAT_NC1HWC0) || (old_format == FORMAT_NCHW) || (old_format == FORMAT_NHWC)); - if (!support) { - GELOGE(INTERNAL_ERROR, "The format [%s] is unsupported", TypeUtils::FormatToSerialString(old_format).c_str()); - return FAILED; - } - if (old_format == FORMAT_NC1HWC0) { - GELOGI("No need to transfer format"); - return SUCCESS; - } - if (ModifyInputFormatAndShape(node_ptr) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "modify format and shape failed"); - return FAILED; - } - if (!is_dynamic_batch) { - NodePtr trans_node = - CreateTransdataNode(input->GetShape(), FORMAT_NC1HWC0, old_shape, old_format, input->GetDataType(), node_ptr); - GE_CHECK_NOTNULL(trans_node); - OutDataAnchorPtr src_out = node_ptr->GetOutDataAnchor(0); - InDataAnchorPtr trans_in = trans_node->GetInDataAnchor(0); - OutDataAnchorPtr trans_out = trans_node->GetOutDataAnchor(0); - if (AddTransNodeBetweenTwoNodes(src_out, trans_in, trans_out) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "add node between two nodes failed"); + if (!output->MutableShape().IsUnknownShape()) { + int64_t size = 0; + graphStatus graph_status = TensorUtils::GetTensorMemorySizeInBytes(*output, size); + if (graph_status != ge::GRAPH_SUCCESS) { + GELOGE(graph_status, "GetTensorSizeInBytes failed!"); return FAILED; } - } else { - auto switchn_op_desc = switchn_node->GetOpDesc(); - GE_CHECK_NOTNULL(switchn_op_desc); - const GeTensorDescPtr &switchn_input = switchn_op_desc->MutableInputDesc(0); - if (ModifyFormatAndShapeForSingleTensor(switchn_input) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "modify format and shape failed"); - return FAILED; - } - for (uint32_t i = 0; i < switchn_node->GetAllOutDataAnchorsSize(); ++i) { - const GeTensorDescPtr &switchn_output = switchn_op_desc->MutableOutputDesc(i); - GE_CHECK_NOTNULL(switchn_output); - old_format = switchn_output->GetFormat(); - old_shape = switchn_output->GetShape(); - if (ModifyFormatAndShapeForSingleTensor(switchn_output) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "modify format and shape failed"); - return FAILED; - } - NodePtr trans_node = CreateTransdataNode(switchn_output->GetShape(), FORMAT_NC1HWC0, old_shape, old_format, - switchn_output->GetDataType(), node_ptr); - GE_CHECK_NOTNULL(trans_node); - OutDataAnchorPtr src_out = switchn_node->GetOutDataAnchor(i); - InDataAnchorPtr cast_in = trans_node->GetInDataAnchor(0); - OutDataAnchorPtr cast_out = trans_node->GetOutDataAnchor(0); - if (AddTransNodeBetweenTwoNodes(src_out, cast_in, cast_out) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "add node between two nodes failed, src name:%s, cast node name:%s.", - switchn_node->GetName().c_str(), trans_node->GetName().c_str()); - return FAILED; - } - } + ge::TensorUtils::SetSize(*input, size); + ge::TensorUtils::SetSize(*output, size); + + GELOGI( + "Modify Data NetOutput format and shape success, node:%s, index:%d, old_shape:%s, old_Format:%s, " + "new_shape:%s, new_format:%s, new_size:%lu", + op_desc->GetName().c_str(), index, formats::JoinToString(old_shape).c_str(), + ge::TypeUtils::FormatToSerialString(old_format).c_str(), formats::JoinToString(dst_shape_dims).c_str(), + ge::TypeUtils::FormatToSerialString(storage_format).c_str(), size); } + return SUCCESS; } @@ -775,44 +586,6 @@ Status CheckIfDynamicBatchScene(NodePtr &data_node, bool &is_dynamic_batch, Node return SUCCESS; } -Status ProcessDataNode(NodePtr &node_ptr) { - bool set_fp16 = false; - if (!ge::AttrUtils::GetBool(node_ptr->GetOpDesc(), "input_fp16", set_fp16) || !set_fp16) { - return SUCCESS; - } - for (auto const &next_node : node_ptr->GetOutNodes()) { - if (next_node->GetType() == AIPP) { - GELOGE(INTERNAL_ERROR, - "This input node [%s] is linked to aipp, can not be set to fp16," - "please check your atc parma insert_op_conf, input_fp16_nodes.", - node_ptr->GetName().c_str()); - return FAILED; - } - } - GELOGI("input_fp16 is found, the node name is %s.", node_ptr->GetName().c_str()); - bool is_dynamic_batch = false; - NodePtr switchn_node = nullptr; - if (CheckIfDynamicBatchScene(node_ptr, is_dynamic_batch, switchn_node)) { - GELOGE(INTERNAL_ERROR, "CheckIfDynamicBatchScene failed"); - return FAILED; - } - if (ProcessInputFP16(node_ptr, is_dynamic_batch, switchn_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "ProcessInputFP16 failed"); - return FAILED; - } - // check if need to set format - bool set_format = false; - if (!ge::AttrUtils::GetBool(node_ptr->GetOpDesc(), "input_set_nc1hwc0", set_format) || !set_format) { - return SUCCESS; - } - GELOGI("The format of node [%s] should be set NC1HWC0.", node_ptr->GetName().c_str()); - if (ProcessInputNC1HWC0(node_ptr, is_dynamic_batch, switchn_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "ProcessInputNC1HWC0 failed"); - return FAILED; - } - return SUCCESS; -} - bool CheckIfSetOutputType(std::string &output_type, ge::DataType &output_data_type) { if (output_type_str_to_datatype.find(output_type) != output_type_str_to_datatype.end()) { output_data_type = output_type_str_to_datatype[output_type]; @@ -830,221 +603,6 @@ bool CheckOpType(const NodePtr &node, const std::string type) { return false; } -Status ProcessFp16Nc1hwc0Dynamic(const OpDescPtr &src_op_desc, NodePtr &node) { - auto merge_out = src_op_desc->MutableOutputDesc(0); - GE_CHECK_NOTNULL(merge_out); - if (ModifyFormatAndShapeForSingleTensor(merge_out) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "modify format and shape failed"); - return FAILED; - } - for (uint32_t i = 0; i < node->GetAllInDataAnchorsSize(); ++i) { - auto merge_in = src_op_desc->MutableInputDesc(i); - GE_CHECK_NOTNULL(merge_in); - ge::Format old_format = merge_in->GetFormat(); - ge::GeShape old_shape = merge_in->GetShape(); - if (ModifyFormatAndShapeForSingleTensor(merge_in) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "modify format and shape failed"); - return FAILED; - } - ge::GeShape new_shape = merge_in->GetShape(); - NodePtr trans_node = CreateTransdataNode(old_shape, old_format, new_shape, FORMAT_NC1HWC0, DT_FLOAT16, node); - GE_CHECK_NOTNULL(trans_node); - const InDataAnchorPtr &dst_in_anchor = node->GetInDataAnchor(i); - GE_CHECK_NOTNULL(dst_in_anchor); - const OutDataAnchorPtr &src_out_anchor = dst_in_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(src_out_anchor); - if (GraphUtils::InsertNodeBetweenDataAnchors(src_out_anchor, dst_in_anchor, trans_node) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "InsertNodeBetweenDataAnchors failed"); - return FAILED; - } - } - return SUCCESS; -} - -Status ProcessNetoutputNodeFp16Nc1hwc0(GeTensorDesc &src_desc, const InDataAnchorPtr &in_anchor, - GeTensorDescPtr &net_output_input_desc, NodePtr &node) { - bool is_dynamic = CheckOpType(node, MERGE); - auto src_op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(src_op_desc); - ge::GeShape src_shape = src_desc.GetShape(); - ge::Format src_format = src_desc.GetFormat(); - ge::DataType src_dtype = src_desc.GetDataType(); - if (src_dtype != DT_FLOAT16) { - if (!is_dynamic) { - auto peer_out = in_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(peer_out); - NodePtr cast_node = CreateCastOp(src_shape, src_dtype, DT_FLOAT16, src_format, node); - GE_CHECK_NOTNULL(cast_node); - if (GraphUtils::InsertNodeBetweenDataAnchors(peer_out, in_anchor, cast_node) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "InsertNodeBetweenDataAnchors failed"); - return FAILED; - } - } else { - // Update outputdesc - const GeTensorDescPtr &merge_output = src_op_desc->MutableOutputDesc(0); - GE_CHECK_NOTNULL(merge_output); - merge_output->SetDataType(DT_FLOAT16); - merge_output->SetOriginDataType(DT_FLOAT16); - // Update input - for (uint32_t i = 0; i < node->GetAllInDataAnchorsSize(); ++i) { - const GeTensorDescPtr &merge_input = src_op_desc->MutableInputDesc(i); - GE_CHECK_NOTNULL(merge_input); - src_shape = merge_input->GetShape(); - src_format = merge_input->GetFormat(); - src_dtype = merge_input->GetDataType(); - merge_input->SetDataType(DT_FLOAT16); - merge_input->SetOriginDataType(DT_FLOAT16); - const InDataAnchorPtr &dst_in_anchor = node->GetInDataAnchor(i); - const OutDataAnchorPtr &src_out_anchor = dst_in_anchor->GetPeerOutAnchor(); - NodePtr cast_node = CreateCastOp(src_shape, src_dtype, DT_FLOAT16, src_format, node); - if (GraphUtils::InsertNodeBetweenDataAnchors(src_out_anchor, dst_in_anchor, cast_node) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "InsertNodeBetweenDataAnchors failed"); - return FAILED; - } - } - } - net_output_input_desc->SetDataType(DT_FLOAT16); - net_output_input_desc->SetOriginDataType(DT_FLOAT16); - } - if (src_format == FORMAT_NC1HWC0) { - GELOGI("Format is NC1HWC0, no need to transfer"); - return SUCCESS; - } - std::vector dst_shape_dims; - std::vector src_shape_dims = src_shape.GetDims(); - if (TransferShape2NC1HWC0(src_format, src_shape_dims, DT_FLOAT16, FORMAT_NC1HWC0, dst_shape_dims) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Trans shape failed"); - return FAILED; - } - ge::GeShape dst_shape(dst_shape_dims); - net_output_input_desc->SetFormat(FORMAT_NC1HWC0); - net_output_input_desc->SetOriginFormat(FORMAT_NC1HWC0); - net_output_input_desc->SetShape(dst_shape); - net_output_input_desc->SetOriginShape(dst_shape); - if (!is_dynamic) { - NodePtr trans_node = CreateTransdataNode(src_shape, src_format, dst_shape, FORMAT_NC1HWC0, DT_FLOAT16, node); - GE_CHECK_NOTNULL(trans_node); - auto peer_out_new = in_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(peer_out_new); - if (GraphUtils::InsertNodeBetweenDataAnchors(peer_out_new, in_anchor, trans_node) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "InsertNodeBetweenDataAnchors failed"); - return FAILED; - } - } else { - if (ProcessFp16Nc1hwc0Dynamic(src_op_desc, node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "ProcessFp16Nc1hwc0Dynamic failed"); - return FAILED; - } - } - return SUCCESS; -} - -Status ProcessOutputDynamic(const NodePtr &src_node, NodePtr &node, ge::DataType &output_data_type) { - OpDescPtr src_op_desc = src_node->GetOpDesc(); - const GeTensorDescPtr &merge_output = src_op_desc->MutableOutputDesc(0); - GE_CHECK_NOTNULL(merge_output); - merge_output->SetDataType(output_data_type); - merge_output->SetOriginDataType(output_data_type); - // Update input - for (uint32_t i = 0; i < src_node->GetAllInDataAnchorsSize(); ++i) { - const GeTensorDescPtr &merge_input = src_op_desc->MutableInputDesc(i); - GE_CHECK_NOTNULL(merge_input); - ge::GeShape src_shape = merge_input->GetShape(); - ge::Format src_format = merge_input->GetFormat(); - ge::DataType src_dtype = merge_input->GetDataType(); - merge_input->SetDataType(output_data_type); - merge_input->SetOriginDataType(output_data_type); - const InDataAnchorPtr &dst_in_anchor = src_node->GetInDataAnchor(i); - GE_CHECK_NOTNULL(dst_in_anchor); - const OutDataAnchorPtr &src_out_anchor = dst_in_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(src_out_anchor); - NodePtr cast_node = CreateCastOp(src_shape, src_dtype, output_data_type, src_format, node); - if (GraphUtils::InsertNodeBetweenDataAnchors(src_out_anchor, dst_in_anchor, cast_node) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "InsertNodeBetweenDataAnchors failed"); - return FAILED; - } - } - return SUCCESS; -} - -Status ProcessNetoutputNode(NodePtr &node, std::string &output_type) { - auto op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - ge::DataType output_data_type = ge::DT_FLOAT; - bool is_set_output_type = CheckIfSetOutputType(output_type, output_data_type); - - for (const auto &in_anchor : node->GetAllInDataAnchors()) { - auto index = static_cast(in_anchor->GetIdx()); - auto peer_out = in_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(peer_out); - auto src_index = static_cast(peer_out->GetIdx()); - auto src_node = peer_out->GetOwnerNode(); - GE_CHECK_NOTNULL(src_node); - bool is_dynamic = CheckOpType(src_node, MERGE); - - OpDescPtr src_op_desc = src_node->GetOpDesc(); - GE_CHECK_NOTNULL(src_op_desc); - auto net_output_input_desc = op_desc->MutableInputDesc(index); - GE_CHECK_NOTNULL(net_output_input_desc); - - ge::GeShape src_shape = src_op_desc->GetOutputDesc(src_index).GetShape(); - ge::Format src_format = src_op_desc->GetOutputDesc(src_index).GetFormat(); - ge::DataType src_dtype = src_op_desc->GetOutputDesc(src_index).GetDataType(); - // Update datatype - if (is_set_output_type) { - GELOGI("Enter into process output_type schedule"); - if (src_dtype == output_data_type) { - GELOGI("Data type is same ,no need to transfer."); - continue; - } - if (!is_dynamic) { - NodePtr cast_node = CreateCastOp(src_shape, src_dtype, output_data_type, src_format, node); - if (GraphUtils::InsertNodeBetweenDataAnchors(peer_out, in_anchor, cast_node) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "InsertNodeBetweenDataAnchors failed"); - return FAILED; - } - } else { - // Update outputdesc - if (ProcessOutputDynamic(src_node, node, output_data_type) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "ProcessOutputDynamic failed"); - return FAILED; - } - } - net_output_input_desc->SetDataType(output_data_type); - net_output_input_desc->SetOriginDataType(output_data_type); - continue; - } - // output_node is not set,check if is_output_adjust_hw_layout is set - bool set_fp16_nc1hwc0 = false; - if (!is_dynamic) { - (void)AttrUtils::GetBool(src_op_desc, "output_set_fp16_nc1hwc0", set_fp16_nc1hwc0); - } else { - // need check dynamic scene, graph structure: node->merge->netoutput - const InDataAnchorPtr &merge_input_anchor = src_node->GetInDataAnchor(0); - GE_CHECK_NOTNULL(merge_input_anchor); - const OutDataAnchorPtr &src_out_anchor = merge_input_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(src_out_anchor); - auto src_merge_node = src_out_anchor->GetOwnerNode(); - GE_CHECK_NOTNULL(src_merge_node); - auto src_merge_node_opdesc = src_merge_node->GetOpDesc(); - (void)AttrUtils::GetBool(src_merge_node_opdesc, "output_set_fp16_nc1hwc0", set_fp16_nc1hwc0); - } - if (set_fp16_nc1hwc0) { - GELOGI("Node [%s] should be set FP16 and NC1HWC0", src_op_desc->GetName().c_str()); - if ((src_format != FORMAT_NCHW) && (src_format != FORMAT_NHWC) && (src_format != FORMAT_NC1HWC0)) { - GELOGE(INTERNAL_ERROR, "Format is not one of NCHW, NHWC, NC1HWC0."); - return FAILED; - } - GeTensorDesc src_desc(src_shape, src_format, src_dtype); - if (ProcessNetoutputNodeFp16Nc1hwc0(src_desc, in_anchor, net_output_input_desc, src_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Process netoutput fp16 nc1hwc0."); - return FAILED; - } - } - } - return SUCCESS; -} - Status CheckIfNeedSetNdFormat(const NodePtr &node_ptr) { auto op = node_ptr->GetOpDesc(); GE_CHECK_NOTNULL(op); @@ -1796,35 +1354,6 @@ Status GraphPrepare::ResourcePairProcess(const std::string &action) { return SUCCESS; } -Status GraphPrepare::OptimizeAfterInfershapeByAtcParams() { - if (options_.train_graph_flag) { - GELOGI("This is train mode, no need to do this schedule."); - return SUCCESS; - } - GE_RETURN_IF_ERROR(InsertNewOpUtil::Instance().UpdateDataNodeByAipp(compute_graph_)); - for (auto &node_ptr : compute_graph_->GetDirectNode()) { - GE_CHECK_NOTNULL(node_ptr); - if (CheckIfNeedSetNdFormat(node_ptr) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Set node [%s] format ND failed", node_ptr->GetName().c_str()); - return FAILED; - } - if (node_ptr->GetType() == DATA) { - if (ProcessDataNode(node_ptr) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Process data node failed"); - return FAILED; - } - } - - if (node_ptr->GetType() == ge::NETOUTPUT) { - if (ProcessNetoutputNode(node_ptr, options_.output_datatype) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Process netoutput node failed"); - return FAILED; - } - } - } - return SUCCESS; -} - Status GraphPrepare::UpdateDataNetOutputByStorageFormat() { for (auto &node_ptr : compute_graph_->GetAllNodes()) { GE_CHECK_NOTNULL(node_ptr); @@ -2011,12 +1540,6 @@ Status GraphPrepare::Preprocess(const std::vector &user_input) { ProcessCCEFormat(); - ret = OptimizeAfterInfershapeByAtcParams(); - if (ret != SUCCESS) { - GELOGE(ret, "Optimize for input if set inputfp16 failed."); - return ret; - } - SaveOriginalGraphToOmModel(); GE_TIMESTAMP_START(OptimizeForPreprocess); @@ -2073,7 +1596,6 @@ Status GraphPrepare::PrepareDynShape(ConstGraphPtr graph, const std::vectoraipp_mode() != domi::AippOpParams::undefined, PARAM_INVALID, - "when insert AIPP op, aipp_mode must be configured as static or dynamic "); - - GE_CHK_BOOL_RET_STATUS(aipp_params_->var_reci_chn_0_size() <= 1, PARAM_INVALID, - "The parameter var_reci_chn_0 can not be configed repeatedly"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->var_reci_chn_1_size() <= 1, PARAM_INVALID, - "The parameter var_reci_chn_1 can not be configed repeatedly"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->var_reci_chn_2_size() <= 1, PARAM_INVALID, - "The parameter var_reci_chn_2 can not be configed repeatedly"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->var_reci_chn_3_size() <= 1, PARAM_INVALID, - "The parameter var_reci_chn_3 can not be configed repeatedly"); - - GE_CHK_BOOL_RET_STATUS(aipp_params_->matrix_r0c0_size() <= 1, PARAM_INVALID, - "The parameter matrix_r0c0 can not be configed repeatedly"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->matrix_r0c1_size() <= 1, PARAM_INVALID, - "The parameter matrix_r0c1 can not be configed repeatedly"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->matrix_r0c2_size() <= 1, PARAM_INVALID, - "The parameter matrix_r0c2 can not be configed repeatedly"); - - GE_CHK_BOOL_RET_STATUS(aipp_params_->matrix_r1c0_size() <= 1, PARAM_INVALID, - "The parameter matrix_r1c0 can not be configed repeatedly"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->matrix_r1c1_size() <= 1, PARAM_INVALID, - "The parameter matrix_r1c1 can not be configed repeatedly"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->matrix_r1c2_size() <= 1, PARAM_INVALID, - "The parameter matrix_r1c2 can not be configed repeatedly"); - - GE_CHK_BOOL_RET_STATUS(aipp_params_->matrix_r2c0_size() <= 1, PARAM_INVALID, - "The parameter matrix_r2c0 can not be configed repeatedly"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->matrix_r2c1_size() <= 1, PARAM_INVALID, - "The parameter matrix_r2c1 can not be configed repeatedly"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->matrix_r2c2_size() <= 1, PARAM_INVALID, - "The parameter matrix_r2c2 can not be configed repeatedly"); - - GE_CHK_BOOL_RET_STATUS(aipp_params_->output_bias_0_size() <= 1, PARAM_INVALID, - "The parameter output_bias_0 can not be configed repeatedly"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->output_bias_1_size() <= 1, PARAM_INVALID, - "The parameter output_bias_1 can not be configed repeatedly"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->output_bias_2_size() <= 1, PARAM_INVALID, - "The parameter output_bias_2 can not be configed repeatedly"); - - GE_CHK_BOOL_RET_STATUS(aipp_params_->input_bias_0_size() <= 1, PARAM_INVALID, - "The parameter input_bias_0 can not be configed repeatedly"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->input_bias_1_size() <= 1, PARAM_INVALID, - "The parameter input_bias_1 can not be configed repeatedly"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->input_bias_2_size() <= 1, PARAM_INVALID, - "The parameter input_bias_2 can not be configed repeatedly"); - - GE_CHK_BOOL_RET_STATUS(aipp_params_->input_edge_idx_size() <= 1, PARAM_INVALID, - "The parameter input_edge_idx can not be configed repeatedly"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->aipp_mode() != domi::AippOpParams::undefined, PARAM_INVALID, + "When insert AIPP op, aipp_mode must be configured as static or dynamic "); + + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_0_size() <= 1, PARAM_INVALID, + "The parameter var_reci_chn_0 can not be configed repeatedly"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_1_size() <= 1, PARAM_INVALID, + "The parameter var_reci_chn_1 can not be configed repeatedly"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_2_size() <= 1, PARAM_INVALID, + "The parameter var_reci_chn_2 can not be configed repeatedly"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_3_size() <= 1, PARAM_INVALID, + "The parameter var_reci_chn_3 can not be configed repeatedly"); + + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r0c0_size() <= 1, PARAM_INVALID, + "The parameter matrix_r0c0 can not be configed repeatedly"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r0c1_size() <= 1, PARAM_INVALID, + "The parameter matrix_r0c1 can not be configed repeatedly"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r0c2_size() <= 1, PARAM_INVALID, + "The parameter matrix_r0c2 can not be configed repeatedly"); + + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r1c0_size() <= 1, PARAM_INVALID, + "The parameter matrix_r1c0 can not be configed repeatedly"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r1c1_size() <= 1, PARAM_INVALID, + "The parameter matrix_r1c1 can not be configed repeatedly"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r1c2_size() <= 1, PARAM_INVALID, + "The parameter matrix_r1c2 can not be configed repeatedly"); + + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r2c0_size() <= 1, PARAM_INVALID, + "The parameter matrix_r2c0 can not be configed repeatedly"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r2c1_size() <= 1, PARAM_INVALID, + "The parameter matrix_r2c1 can not be configed repeatedly"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r2c2_size() <= 1, PARAM_INVALID, + "The parameter matrix_r2c2 can not be configed repeatedly"); + + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->output_bias_0_size() <= 1, PARAM_INVALID, + "The parameter output_bias_0 can not be configed repeatedly"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->output_bias_1_size() <= 1, PARAM_INVALID, + "The parameter output_bias_1 can not be configed repeatedly"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->output_bias_2_size() <= 1, PARAM_INVALID, + "The parameter output_bias_2 can not be configed repeatedly"); + + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_bias_0_size() <= 1, PARAM_INVALID, + "The parameter input_bias_0 can not be configed repeatedly"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_bias_1_size() <= 1, PARAM_INVALID, + "The parameter input_bias_1 can not be configed repeatedly"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_bias_2_size() <= 1, PARAM_INVALID, + "The parameter input_bias_2 can not be configed repeatedly"); + + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_edge_idx_size() <= 1, PARAM_INVALID, + "The parameter input_edge_idx can not be configed repeatedly"); const domi::AippOpParams::AippMode aipp_mode = aipp_params_->aipp_mode(); if (aipp_mode == domi::AippOpParams::dynamic) { - GE_CHK_BOOL_RET_STATUS(aipp_params_->max_src_image_size() > 0, PARAM_INVALID, - "for dynamic AIPP params, max_src_image_size must greater than 0"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG( + aipp_params_->max_src_image_size() > 0, PARAM_INVALID, + "For dynamic AIPP params, max_src_image_size must be set which number should be greater than 0"); } else { - GE_CHK_BOOL_RET_STATUS(aipp_params_->input_format() != domi::AippOpParams::UNDEFINED, PARAM_INVALID, - "Input format of AIPP conf is undefined"); - - GE_CHK_BOOL_RET_STATUS(aipp_params_->src_image_size_w() >= 0, PARAM_INVALID, - "src_image_size_w must not be configed smaller than 0"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->src_image_size_h() >= 0, PARAM_INVALID, - "src_image_size_h must not be configed smaller than 0"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->load_start_pos_w() >= 0, PARAM_INVALID, - "load_start_pos_w must not be configed smaller than 0"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->load_start_pos_h() >= 0, PARAM_INVALID, - "load_start_pos_h must not be configed smaller than 0"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->crop_size_w() >= 0, PARAM_INVALID, - "crop_size_w must not be configed smaller than 0"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->resize_output_w() >= 0, PARAM_INVALID, - "resize_output_w must not be configed smaller than 0"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->resize_output_h() >= 0, PARAM_INVALID, - "resize_output_h must not be configed smaller than 0"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->left_padding_size() >= 0, PARAM_INVALID, - "left_padding_size must not be configed smaller than 0"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->right_padding_size() >= 0, PARAM_INVALID, - "right_padding_size must not be configed smaller than 0"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->top_padding_size() >= 0, PARAM_INVALID, - "top_padding_size must not be configed smaller than 0"); - GE_CHK_BOOL_RET_STATUS(aipp_params_->bottom_padding_size() >= 0, PARAM_INVALID, - "bottom_padding_size must not be configed smaller than 0"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_format() != domi::AippOpParams::UNDEFINED, PARAM_INVALID, + "Input format of AIPP conf is undefined"); + + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->src_image_size_w() >= 0, PARAM_INVALID, + "Src_image_size_w must not be configed smaller than 0"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->src_image_size_h() >= 0, PARAM_INVALID, + "Src_image_size_h must not be configed smaller than 0"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->load_start_pos_w() >= 0, PARAM_INVALID, + "Load_start_pos_w must not be configed smaller than 0"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->load_start_pos_h() >= 0, PARAM_INVALID, + "Load_start_pos_h must not be configed smaller than 0"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->crop_size_w() >= 0, PARAM_INVALID, + "Crop_size_w must not be configed smaller than 0"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->resize_output_w() >= 0, PARAM_INVALID, + "Resize_output_w must not be configed smaller than 0"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->resize_output_h() >= 0, PARAM_INVALID, + "Resize_output_h must not be configed smaller than 0"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->left_padding_size() >= 0, PARAM_INVALID, + "Left_padding_size must not be configed smaller than 0"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->right_padding_size() >= 0, PARAM_INVALID, + "Right_padding_size must not be configed smaller than 0"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->top_padding_size() >= 0, PARAM_INVALID, + "Top_padding_size must not be configed smaller than 0"); + AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->bottom_padding_size() >= 0, PARAM_INVALID, + "Bottom_padding_size must not be configed smaller than 0"); } return SUCCESS; diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.cc b/src/ge/graph/preprocess/multi_batch_copy_graph.cc index d06a493d..d1e9fe62 100644 --- a/src/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/src/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -35,6 +35,9 @@ #include "graph/utils/graph_utils.h" #include "graph/utils/node_utils.h" +using std::string; +using std::vector; + namespace ge { namespace multibatch { namespace { @@ -45,6 +48,7 @@ const int kDataOutIndex = 0; const int kDataInIndex = 0; const int kMergeDataOutIndex = 0; const int kStaticOutput = -1; +const int kDecimal = 10; const size_t kMaxShapesCount = 100; const size_t kMinShapesCount = 2; @@ -209,7 +213,7 @@ Status CheckDataShape(const std::vector &nodes) { if (unknown_shape_count == 0) { ErrorManager::GetInstance().ATCReportErrMessage("E10040"); GELOGE(PARAM_INVALID, - "Need unknow shape data when user set --dynamic_batch_size or --dynamic_image_size, please check."); + "Need unknow shape data when user set --dynamic_batch_size, --dynamic_image_size or --dynamic_dims"); return PARAM_INVALID; } @@ -494,21 +498,21 @@ Status MultiBatchGraphCopyer::CheckArguments() { return PARAM_INVALID; } if (shapes_.size() < kMinShapesCount) { - ErrorManager::GetInstance().ATCReportErrMessage("E10035", {"shapesize", "minshapesize"}, - {std::to_string(shapes_.size()), std::to_string(kMinShapesCount)}); + ErrorManager::GetInstance().ATCReportErrMessage( + "E10035", {"shapesize", "minshapesize"}, {std::to_string(shapes_.size()), std::to_string(kMinShapesCount - 1)}); GELOGE(PARAM_INVALID, - "Input parameter[--dynamic_batch_size or --dynamic_image_size]'s " + "Input parameter[--dynamic_batch_size, --dynamic_image_size or --dynamic_dims]'s " "value size [%zu] must be greater than [%zu].", - shapes_.size(), kMinShapesCount); + shapes_.size(), kMinShapesCount - 1); return PARAM_INVALID; } if (shapes_.size() > kMaxShapesCount) { - ErrorManager::GetInstance().ATCReportErrMessage("E10036", {"shapesize", "maxshapesize"}, - {std::to_string(shapes_.size()), std::to_string(kMaxShapesCount)}); + ErrorManager::GetInstance().ATCReportErrMessage( + "E10036", {"shapesize", "maxshapesize"}, {std::to_string(shapes_.size()), std::to_string(kMaxShapesCount + 1)}); GELOGE(PARAM_INVALID, - "Input parameter[--dynamic_batch_size or --dynamic_image_size]'s " + "Input parameter[--dynamic_batch_size, --dynamic_image_size or --dynamic_dims]'s " "value size [%zu] must be less than [%zu].", - shapes_.size(), kMaxShapesCount); + shapes_.size(), kMaxShapesCount + 1); return PARAM_INVALID; } std::set> shapes_set; @@ -518,7 +522,7 @@ Status MultiBatchGraphCopyer::CheckArguments() { ErrorManager::GetInstance().ATCReportErrMessage("E10037", {"shapesize1", "shapesize2"}, {std::to_string(shape_size), std::to_string(shape.size())}); GELOGE(PARAM_INVALID, - "Input parameter[--dynamic_batch_size or --dynamic_image_size]'s " + "Input parameter[--dynamic_batch_size, --dynamic_image_size or --dynamic_dims]'s " "value size must be same, first group's size is %zu and another's is %zu.", shape_size, shape.size()); return PARAM_INVALID; @@ -535,7 +539,7 @@ Status MultiBatchGraphCopyer::CheckArguments() { if (shapes_set.size() != shapes_.size()) { ErrorManager::GetInstance().ATCReportErrMessage("E10039"); GELOGE(PARAM_INVALID, - "Input parameter[--dynamic_batch_size or --dynamic_image_size] exist duplicate shapes, please check"); + "Input parameter[--dynamic_batch_size, --dynamic_image_size or --dynamic_dims] exist duplicate shapes."); return PARAM_INVALID; } return SUCCESS; @@ -690,6 +694,10 @@ Status MultiBatchGraphCopyer::InsertSwitchNForData(const NodePtr &data) { GELOGE(INTERNAL_ERROR, "Failed to add attr value on output %zu tensor", i); return INTERNAL_ERROR; } + if (!AttrUtils::SetListInt(tensor, ATTR_NAME_COMBINED_DYNAMIC_DIMS, shape.GetDims())) { + GELOGE(INTERNAL_ERROR, "Failed to add attr ATTR_NAME_COMBINED_DYNAMIC_DIMS on output %zu tensor", i); + return INTERNAL_ERROR; + } if (switchn_desc->AddOutputDesc("output" + std::to_string(i), tensor) != GRAPH_SUCCESS) { GELOGE(GRAPH_FAILED, "Opdesc AddOutputDesc failed"); return GRAPH_FAILED; @@ -705,6 +713,10 @@ Status MultiBatchGraphCopyer::InsertSwitchNForData(const NodePtr &data) { GELOGE(INTERNAL_ERROR, "Failed to add switchn attr on data node %s", data->GetName().c_str()); return INTERNAL_ERROR; } + if (StampDynamicTypeForSwitchN(switchn_desc) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to add dynamic type attr on switchn node %s", switchn_desc->GetName().c_str()); + return INTERNAL_ERROR; + } auto switchn = graph_->AddNode(switchn_desc); if (switchn == nullptr) { @@ -714,6 +726,26 @@ Status MultiBatchGraphCopyer::InsertSwitchNForData(const NodePtr &data) { data_nodes_to_switchn_[data.get()] = switchn; return SUCCESS; } + +Status MultiBatchGraphCopyer::StampDynamicTypeForSwitchN(OpDescPtr &switchn_desc) { + GE_CHECK_NOTNULL(switchn_desc); + int32_t dynamic_type = static_cast(FIXED); + if (!domi::GetContext().dynamic_batch_size.empty()) { + dynamic_type = static_cast(DYNAMIC_BATCH); + } + if (!domi::GetContext().dynamic_image_size.empty()) { + dynamic_type = static_cast(DYNAMIC_IMAGE); + } + if (!domi::GetContext().dynamic_dims.empty()) { + dynamic_type = static_cast(DYNAMIC_DIMS); + } + if (!AttrUtils::SetInt(switchn_desc, ATTR_DYNAMIC_TYPE, dynamic_type)) { + GELOGE(INTERNAL_ERROR, "Failed to add dynamic type attr of switchn node %s", switchn_desc->GetName().c_str()); + return INTERNAL_ERROR; + } + return SUCCESS; +} + Status MultiBatchGraphCopyer::InsertMergeForEdgeNode(const NodePtr &node) { for (auto &in_data_anchor : node->GetAllInDataAnchors()) { auto src_out_anchor = in_data_anchor->GetPeerOutAnchor(); @@ -913,7 +945,6 @@ Status MultiBatchGraphCopyer::LinkToNodeOutBranch(const NodePtr &node) { } Status ProcessMultiBatch(ComputeGraphPtr &graph) { - const int kDecimal = 10; std::vector> shapes; if (!domi::GetContext().dynamic_batch_size.empty()) { GELOGD("Found dynamic batch option, value %s", domi::GetContext().dynamic_batch_size.c_str()); @@ -926,25 +957,25 @@ Status ProcessMultiBatch(ComputeGraphPtr &graph) { GELOGI("Found dynamic batch, shape %s", formats::JoinToString(*shapes.rbegin()).c_str()); } } + if (!domi::GetContext().dynamic_image_size.empty()) { GELOGD("Found dynamic image size option, value %s", domi::GetContext().dynamic_image_size.c_str()); - std::vector shape_strs = ge::StringUtils::Split(domi::GetContext().dynamic_image_size, ';'); - for (const auto &shape_str : shape_strs) { - if (shape_str.empty()) { - continue; - } - std::vector shape; - std::vector dims = ge::StringUtils::Split(shape_str, ','); - for (const auto &dim : dims) { - if (dim.empty()) { - continue; - } - shape.emplace_back(std::strtol(dim.c_str(), nullptr, kDecimal)); - } - shapes.emplace_back(shape); + ParseDynamicSize(domi::GetContext().dynamic_image_size, shapes); + + for (const auto &shape : shapes) { GELOGI("Found dynamic image size, shape %s", formats::JoinToString(shape).c_str()); } } + + if (!domi::GetContext().dynamic_dims.empty()) { + GELOGD("Found dynamic dims option, value %s", domi::GetContext().dynamic_dims.c_str()); + ParseDynamicSize(domi::GetContext().dynamic_dims, shapes); + + for (const auto &shape : shapes) { + GELOGI("Found dynamic dims, shape %s", formats::JoinToString(shape).c_str()); + } + } + if (shapes.empty()) { GELOGD("There is no multi-batch options, no need to process multi-batch copy"); return SUCCESS; @@ -958,6 +989,26 @@ Status ProcessMultiBatch(ComputeGraphPtr &graph) { return copyer.CopyGraph(); } +void ParseDynamicSize(string dynamic_size, vector> &shapes) { + std::vector shape_strs = ge::StringUtils::Split(dynamic_size, ';'); + for (const auto &shape_str : shape_strs) { + if (shape_str.empty()) { + continue; + } + std::vector shape; + std::vector dims = ge::StringUtils::Split(shape_str, ','); + for (const auto &dim : dims) { + if (dim.empty()) { + continue; + } + shape.emplace_back(std::strtol(dim.c_str(), nullptr, kDecimal)); + } + if (!shape.empty()) { + shapes.emplace_back(shape); + } + } +} + Status GetDynamicOutputShape(ComputeGraphPtr &graph) { GELOGI("Start to get dynamic output dynamic batch shape msg"); std::vector dynamic_output_dims; diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.h b/src/ge/graph/preprocess/multi_batch_copy_graph.h index bf1d53b3..7e317cb0 100644 --- a/src/ge/graph/preprocess/multi_batch_copy_graph.h +++ b/src/ge/graph/preprocess/multi_batch_copy_graph.h @@ -27,6 +27,8 @@ namespace ge { namespace multibatch { Status ProcessMultiBatch(ComputeGraphPtr &graph); +void ParseDynamicSize(std::string dynamic_size, std::vector> &shapes); + Status GetDynamicOutputShape(ComputeGraphPtr &graph); enum NodeStatus { @@ -54,6 +56,7 @@ class MultiBatchGraphCopyer { NodePtr InsertShapeDataNode(); Status InsertSwitchNForData(const NodePtr &data); + Status StampDynamicTypeForSwitchN(OpDescPtr &switchn_desc); Status UpdateMaxShapeToData(const NodePtr &data); Status InsertMergeForEdgeNode(const NodePtr &node); diff --git a/src/ge/host_aicpu_engine/common/constant/constant.h b/src/ge/host_aicpu_engine/common/constant/constant.h new file mode 100644 index 00000000..998dc7eb --- /dev/null +++ b/src/ge/host_aicpu_engine/common/constant/constant.h @@ -0,0 +1,30 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_HOST_AICPU_ENGINE_COMMON_CONSTANT_CONSTANT_H_ +#define GE_HOST_AICPU_ENGINE_COMMON_CONSTANT_CONSTANT_H_ + +#include + +namespace ge { +namespace host_aicpu { +// engine name +const char kHostAiCpuEngineName[] = "DNN_VM_HOST_AICPU"; +const char kHostAiCpuOpKernelLibName[] = "DNN_VM_HOST_AICPU_OP_STORE"; +} // namespace host_aicpu +} // namespace ge + +#endif // GE_HOST_AICPU_ENGINE_COMMON_CONSTANT_CONSTANT_H_ diff --git a/src/ge/host_aicpu_engine/engine/host_aicpu_engine.cc b/src/ge/host_aicpu_engine/engine/host_aicpu_engine.cc new file mode 100644 index 00000000..12ec5ede --- /dev/null +++ b/src/ge/host_aicpu_engine/engine/host_aicpu_engine.cc @@ -0,0 +1,74 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "host_aicpu_engine/engine/host_aicpu_engine.h" +#include +#include +#include +#include "framework/common/debug/ge_log.h" +#include "common/ge/ge_util.h" +#include "host_aicpu_engine/common/constant/constant.h" +#include "host_aicpu_engine/ops_kernel_store/host_aicpu_ops_kernel_info.h" + +namespace ge { +namespace host_aicpu { +HostAiCpuEngine &HostAiCpuEngine::Instance() { + static HostAiCpuEngine instance; + return instance; +} + +Status HostAiCpuEngine::Initialize(const std::map &options) { + if (ops_kernel_store_ == nullptr) { + ops_kernel_store_ = MakeShared(); + if (ops_kernel_store_ == nullptr) { + GELOGE(FAILED, "Make HostAiCpuOpsKernelInfoStore failed."); + return FAILED; + } + } + return SUCCESS; +} + +void HostAiCpuEngine::GetOpsKernelInfoStores(std::map &ops_kernel_map) { + if (ops_kernel_store_ != nullptr) { + // add buildin opsKernel to opsKernelInfoMap + ops_kernel_map[kHostAiCpuOpKernelLibName] = ops_kernel_store_; + } +} + +void HostAiCpuEngine::GetGraphOptimizerObjs(std::map &) { + // no optimizer for host aicpu engine +} + +Status HostAiCpuEngine::Finalize() { + ops_kernel_store_ = nullptr; + return SUCCESS; +} +} // namespace host_aicpu +} // namespace ge + +ge::Status Initialize(const std::map &options) { + return ge::host_aicpu::HostAiCpuEngine::Instance().Initialize(options); +} + +void GetOpsKernelInfoStores(std::map &ops_kernel_map) { + ge::host_aicpu::HostAiCpuEngine::Instance().GetOpsKernelInfoStores(ops_kernel_map); +} + +void GetGraphOptimizerObjs(std::map &graph_optimizers) { + ge::host_aicpu::HostAiCpuEngine::Instance().GetGraphOptimizerObjs(graph_optimizers); +} + +ge::Status Finalize() { return ge::host_aicpu::HostAiCpuEngine::Instance().Finalize(); } diff --git a/src/ge/host_aicpu_engine/engine/host_aicpu_engine.h b/src/ge/host_aicpu_engine/engine/host_aicpu_engine.h new file mode 100644 index 00000000..f8ad71b1 --- /dev/null +++ b/src/ge/host_aicpu_engine/engine/host_aicpu_engine.h @@ -0,0 +1,111 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_HOST_AICPU_ENGINE_ENGINE_HOST_AICPU_ENGINE_H_ +#define GE_HOST_AICPU_ENGINE_ENGINE_HOST_AICPU_ENGINE_H_ + +#include +#include +#include +#include "common/opskernel/ops_kernel_info_store.h" +#include "common/optimizer/graph_optimizer.h" + +using OpsKernelInfoStorePtr = std::shared_ptr; +using GraphOptimizerPtr = std::shared_ptr; + +namespace ge { +namespace host_aicpu { +/** + * host aicpu engine. + * Used for the ops which executes on host. + */ +class HostAiCpuEngine { + public: + /** + * get HostAiCpuEngine instance. + * @return HostAiCpuEngine instance. + */ + static HostAiCpuEngine &Instance(); + + virtual ~HostAiCpuEngine() = default; + + /** + * When Ge start, GE will invoke this interface + * @return The status whether initialize successfully + */ + Status Initialize(const std::map &options); + + /** + * After the initialize, GE will invoke this interface + * to get the Ops kernel Store. + * @param ops_kernel_map The host aicpu's ops kernel info + */ + void GetOpsKernelInfoStores(std::map &ops_kernel_map); + + /** + * After the initialize, GE will invoke this interface + * to get the Graph Optimizer. + * @param graph_optimizers The host aicpu's Graph Optimizer objs + */ + void GetGraphOptimizerObjs(std::map &graph_optimizers); + + /** + * When the graph finished, GE will invoke this interface + * @return The status whether initialize successfully + */ + Status Finalize(); + + HostAiCpuEngine(const HostAiCpuEngine &HostAiCpuEngine) = delete; + HostAiCpuEngine(const HostAiCpuEngine &&HostAiCpuEngine) = delete; + HostAiCpuEngine &operator=(const HostAiCpuEngine &HostAiCpuEngine) = delete; + HostAiCpuEngine &operator=(HostAiCpuEngine &&HostAiCpuEngine) = delete; + + private: + HostAiCpuEngine() = default; + + OpsKernelInfoStorePtr ops_kernel_store_ = nullptr; +}; +} // namespace host_aicpu +} // namespace ge + +extern "C" { + +/** + * When Ge start, GE will invoke this interface + * @return The status whether initialize successfully + */ +ge::Status Initialize(const map &options); + +/** + * After the initialize, GE will invoke this interface to get the Ops kernel Store + * @param ops_kernel_map The host aicpu's ops kernel info + */ +void GetOpsKernelInfoStores(std::map &ops_kernel_map); + +/** + * After the initialize, GE will invoke this interface to get the Graph Optimizer + * @param graph_optimizers The host aicpu's Graph Optimizer objs + */ +void GetGraphOptimizerObjs(std::map &graph_optimizers); + +/** + * When the graph finished, GE will invoke this interface + * @return The status whether initialize successfully + */ +ge::Status Finalize(); +} + +#endif // GE_HOST_AICPU_ENGINE_ENGINE_HOST_AICPU_ENGINE_H_ diff --git a/src/ge/host_aicpu_engine/module.mk b/src/ge/host_aicpu_engine/module.mk new file mode 100644 index 00000000..d2fe539e --- /dev/null +++ b/src/ge/host_aicpu_engine/module.mk @@ -0,0 +1,59 @@ +LOCAL_PATH := $(call my-dir) + + +local_lib_src_files := engine/host_aicpu_engine.cc \ + ops_kernel_store/host_aicpu_ops_kernel_info.cc \ + ops_kernel_store/op/op_factory.cc \ + ops_kernel_store/op/variable_op.cc \ + ops_kernel_store/op/assign_op.cc \ + ops_kernel_store/op/random_uniform_op.cc \ + +local_lib_inc_path := proto/task.proto \ + ${LOCAL_PATH} \ + ${TOPDIR}inc \ + ${TOPDIR}inc/external \ + ${TOPDIR}inc/external/graph \ + $(TOPDIR)libc_sec/include \ + ${TOPDIR}third_party/protobuf/include \ + ${TOPDIR}inc/framework \ + $(TOPDIR)framework/domi \ + +#compiler for host +include $(CLEAR_VARS) +LOCAL_MODULE := libhost_aicpu_engine +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -std=c++11 +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := +LOCAL_SHARED_LIBRARIES := libprotobuf \ + libc_sec \ + libslog \ + libgraph \ + libregister \ + libruntime + +LOCAL_SRC_FILES := $(local_lib_src_files) +LOCAL_C_INCLUDES := $(local_lib_inc_path) + +include ${BUILD_HOST_SHARED_LIBRARY} + +#compiler for atc +include $(CLEAR_VARS) +LOCAL_MODULE := atclib/libhost_aicpu_engine +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -std=c++11 +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := +LOCAL_SHARED_LIBRARIES := libprotobuf \ + libc_sec \ + libslog \ + libgraph \ + libregister \ + libruntime_compile + +LOCAL_SRC_FILES := $(local_lib_src_files) +LOCAL_C_INCLUDES := $(local_lib_inc_path) + +include ${BUILD_HOST_SHARED_LIBRARY} diff --git a/src/ge/host_aicpu_engine/ops_kernel_store/host_aicpu_ops_kernel_info.cc b/src/ge/host_aicpu_engine/ops_kernel_store/host_aicpu_ops_kernel_info.cc new file mode 100644 index 00000000..4dbedab1 --- /dev/null +++ b/src/ge/host_aicpu_engine/ops_kernel_store/host_aicpu_ops_kernel_info.cc @@ -0,0 +1,132 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "host_aicpu_engine/ops_kernel_store/host_aicpu_ops_kernel_info.h" +#include +#include "common/constant/constant.h" +#include "ge/ge_api_types.h" +#include "common/ge/ge_util.h" +#include "common/ge_inner_error_codes.h" +#include "framework/common/debug/ge_log.h" +#include "graph/utils/node_utils.h" +#include "graph/utils/tensor_utils.h" +#include "graph/utils/type_utils.h" +#include "op/op_factory.h" +#include "proto/task.pb.h" + +namespace ge { +namespace host_aicpu { +using domi::TaskDef; +using std::map; +using std::string; +using std::vector; + +Status HostAiCpuOpsKernelInfoStore::Initialize(const map &options) { + GELOGI("HostAiCpuOpsKernelInfoStore init start."); + OpInfo default_op_info = {.engine = kHostAiCpuEngineName, + .opKernelLib = kHostAiCpuOpKernelLibName, + .computeCost = 0, + .flagPartial = false, + .flagAsync = false, + .isAtomic = false}; + // Init op_info_map_ + auto all_ops = OpFactory::Instance().GetAllOps(); + for (auto &op : all_ops) { + op_info_map_[op] = default_op_info; + } + + GELOGI("HostAiCpuOpsKernelInfoStore inited success. op num=%zu", op_info_map_.size()); + + return SUCCESS; +} + +Status HostAiCpuOpsKernelInfoStore::Finalize() { + op_info_map_.clear(); + return SUCCESS; +} + +Status HostAiCpuOpsKernelInfoStore::CalcOpRunningParam(Node &ge_node) { + OpDescPtr op_desc = ge_node.GetOpDesc(); + if (op_desc == nullptr) { + GELOGE(FAILED, "CalcOpRunningParam failed, as op desc is null"); + return FAILED; + } + + bool is_shape_unknown = false; + if (NodeUtils::GetNodeUnknownShapeStatus(ge_node, is_shape_unknown) == GRAPH_SUCCESS) { + if (is_shape_unknown) { + GELOGI("op:%s is unknown shape, does not need to calc output size.", ge_node.GetName().c_str()); + return SUCCESS; + } + } + + const string name = ge_node.GetName(); + const string type = ge_node.GetType(); + GELOGD("Calc op[%s:%s] running param, output size=%zu.", name.c_str(), type.c_str(), op_desc->GetOutputsSize()); + + for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) { + GeTensorDesc output_tensor = op_desc->GetOutputDesc(static_cast(i)); + Format format = output_tensor.GetFormat(); + DataType data_type = output_tensor.GetDataType(); + + int64_t mem_size = 0; + // If mem size has been set, no need reset. + if ((TensorUtils::GetSize(output_tensor, mem_size) == GRAPH_SUCCESS) && (mem_size > 0)) { + GELOGD("Op[%s:%s] out[%zu] mem size has been set, no need calc again, format=%s, data_type=%s, mem_size=%ld.", + name.c_str(), type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str(), mem_size); + continue; + } + + int64_t output_mem_size = 0; + GeShape output_shape = output_tensor.GetShape(); + if ((TensorUtils::CalcTensorMemSize(output_shape, format, data_type, output_mem_size) != GRAPH_SUCCESS) || + (output_mem_size < 0)) { + GELOGE(FAILED, "Calc op[%s:%s] out[%zu] mem size failed, mem_size=%ld, format=%s, data_type=%s.", name.c_str(), + type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); + return FAILED; + } + GELOGI("Calc op[%s:%s] out[%zu] mem size is %ld, format=%s, data_type=%s.", name.c_str(), type.c_str(), i, + output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); + + TensorUtils::SetSize(output_tensor, output_mem_size); + if (op_desc->UpdateOutputDesc(static_cast(i), output_tensor) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Update op[%s:%s] out[%zu] desc failed, format=%s, data_type=%s.", name.c_str(), type.c_str(), i, + TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); + return FAILED; + } + } + GELOGD("Calc op[%s:%s] running param success.", name.c_str(), type.c_str()); + return SUCCESS; +} + +void HostAiCpuOpsKernelInfoStore::GetAllOpsKernelInfo(map &infos) const { infos = op_info_map_; } + +Status HostAiCpuOpsKernelInfoStore::GenerateTask(const Node &node, RunContext &context, vector &tasks) { + // no need to generate device task + return SUCCESS; +} + +bool HostAiCpuOpsKernelInfoStore::CheckSupported(const OpDescPtr &op_desc, std::string &) const { + if (op_desc == nullptr) { + return false; + } + return op_info_map_.count(op_desc->GetType()) > 0; +} +} // namespace host_aicpu +} // namespace ge diff --git a/src/ge/host_aicpu_engine/ops_kernel_store/host_aicpu_ops_kernel_info.h b/src/ge/host_aicpu_engine/ops_kernel_store/host_aicpu_ops_kernel_info.h new file mode 100644 index 00000000..a4051b9b --- /dev/null +++ b/src/ge/host_aicpu_engine/ops_kernel_store/host_aicpu_ops_kernel_info.h @@ -0,0 +1,88 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_HOST_AICPU_ENGINE_OPS_KERNEL_STORE_HOST_AICPU_OPS_KERNEL_INFO_H_ +#define GE_HOST_AICPU_ENGINE_OPS_KERNEL_STORE_HOST_AICPU_OPS_KERNEL_INFO_H_ + +#include +#include +#include + +#include "common/opskernel/ops_kernel_info_store.h" + +namespace ge { +namespace host_aicpu { +class HostAiCpuOpsKernelInfoStore : public OpsKernelInfoStore { + public: + HostAiCpuOpsKernelInfoStore() {} + ~HostAiCpuOpsKernelInfoStore() override = default; + + /** + * Initialize related resources of the host aicpu kernelinfo store + * @return status whether this operation success + */ + Status Initialize(const std::map &options) override; + + /** + * Release related resources of the host aicpu kernel info store + * @return status whether this operation success + */ + Status Finalize() override; + + /** + * Check to see if an operator is fully supported or partially supported. + * @param op_desc OpDesc information + * @param reason unsupported reason + * @return bool value indicate whether the operator is fully supported + */ + bool CheckSupported(const OpDescPtr &op_desc, std::string &reason) const override; + + /** + * Returns the full operator information. + * @param infos reference of a map, + * contain operator's name and detailed information + */ + void GetAllOpsKernelInfo(std::map &infos) const override; + + /** + * Calc the running size of Operator, + * then GE will alloc the mem size from runtime + * @param ge_node Node information + * @return status whether this operation success + */ + Status CalcOpRunningParam(ge::Node &ge_node) override; + + /** + * call the runtime's interface to generate the task + * @param node Node information + * @param context run context info + * @return status whether this operation success + */ + Status GenerateTask(const ge::Node &ge_node, ge::RunContext &context, std::vector &tasks) override; + + HostAiCpuOpsKernelInfoStore(const HostAiCpuOpsKernelInfoStore &ops_kernel_store) = delete; + HostAiCpuOpsKernelInfoStore(const HostAiCpuOpsKernelInfoStore &&ops_kernel_store) = delete; + HostAiCpuOpsKernelInfoStore &operator=(const HostAiCpuOpsKernelInfoStore &ops_kernel_store) = delete; + HostAiCpuOpsKernelInfoStore &operator=(HostAiCpuOpsKernelInfoStore &&ops_kernel_store) = delete; + + private: + // store op name and OpInfo key-value pair + std::map op_info_map_; +}; +} // namespace host_aicpu +} // namespace ge + +#endif // GE_HOST_AICPU_ENGINE_OPS_KERNEL_STORE_HOST_AICPU_OPS_KERNEL_INFO_H_ diff --git a/src/ge/host_aicpu_engine/ops_kernel_store/op/assign_op.cc b/src/ge/host_aicpu_engine/ops_kernel_store/op/assign_op.cc new file mode 100644 index 00000000..32f8ec24 --- /dev/null +++ b/src/ge/host_aicpu_engine/ops_kernel_store/op/assign_op.cc @@ -0,0 +1,51 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "host_aicpu_engine/ops_kernel_store/op/assign_op.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/util.h" +#include "host_aicpu_engine/ops_kernel_store/op/op_factory.h" + +namespace { +const size_t kAssignInputNum = 2; +const size_t kAssignRefInputIndex = 0; +const size_t kAssignValueInputIndex = 1; +const size_t kAssignRefOutputIndex = 0; +} // namespace + +namespace ge { +namespace host_aicpu { +Status AssignOp::Compute(const ge::OpDescPtr &op_desc_ptr, const std::vector &inputs, + std::vector &outputs) { + GELOGI("AssignOp [%s, %s] compute begin.", node_.GetName().c_str(), node_.GetType().c_str()); + if (inputs.size() != kAssignInputNum) { + GELOGE(PARAM_INVALID, "Number of input for AssignOp must be %zu.", kAssignInputNum); + return PARAM_INVALID; + } + auto &ref_input = inputs[kAssignRefInputIndex]; + const auto &value_input = inputs[kAssignValueInputIndex]; + ref_input->SetData(value_input->GetData().GetData(), value_input->GetData().GetSize()); + GeTensorPtr output_ptr = MakeShared(op_desc_ptr->GetOutputDesc(kAssignRefOutputIndex), + value_input->GetData().GetData(), value_input->GetData().GetSize()); + GE_CHECK_NOTNULL(output_ptr); + outputs.push_back(output_ptr); + GELOGI("AssignOp [%s, %s] compute success.", node_.GetName().c_str(), node_.GetType().c_str()); + return SUCCESS; +} + +REGISTER_OP_CREATOR(Assign, AssignOp); +} // namespace host_aicpu +} // namespace ge diff --git a/src/ge/host_aicpu_engine/ops_kernel_store/op/assign_op.h b/src/ge/host_aicpu_engine/ops_kernel_store/op/assign_op.h new file mode 100644 index 00000000..caf9d4c9 --- /dev/null +++ b/src/ge/host_aicpu_engine/ops_kernel_store/op/assign_op.h @@ -0,0 +1,41 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_HOST_AICPU_ENGINE_OPS_KERNEL_STORE_OP_ASSIGN_OP_H_ +#define GE_HOST_AICPU_ENGINE_OPS_KERNEL_STORE_OP_ASSIGN_OP_H_ + +#include "host_aicpu_engine/ops_kernel_store/op/op.h" + +namespace ge { +namespace host_aicpu { +class AssignOp : public Op { + public: + AssignOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} + ~AssignOp() override = default; + AssignOp &operator=(const AssignOp &op) = delete; + AssignOp(const AssignOp &op) = delete; + + /** + * @brief compute for node_task. + * @return result + */ + Status Compute(const ge::OpDescPtr &op_desc_ptr, const std::vector &inputs, + std::vector &outputs) override; +}; +} // namespace host_aicpu +} // namespace ge + +#endif // GE_HOST_AICPU_ENGINE_OPS_KERNEL_STORE_OP_ASSIGN_OP_H_ diff --git a/src/ge/host_aicpu_engine/ops_kernel_store/op/op.h b/src/ge/host_aicpu_engine/ops_kernel_store/op/op.h new file mode 100644 index 00000000..a9e4550b --- /dev/null +++ b/src/ge/host_aicpu_engine/ops_kernel_store/op/op.h @@ -0,0 +1,48 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_HOST_AICPU_ENGINE_OPS_KERNEL_STORE_OP_OP_H_ +#define GE_HOST_AICPU_ENGINE_OPS_KERNEL_STORE_OP_OP_H_ + +#include +#include +#include +#include "common/ge_inner_error_codes.h" +#include "common/opskernel/ops_kernel_info_types.h" +#include "graph/node.h" + +namespace ge { +namespace host_aicpu { +/** + * The base class for all op. + */ +class Op { + public: + Op(const Node &node, RunContext &run_context) : run_context_(run_context), node_(node) {} + virtual ~Op() = default; + + Status Run() { return SUCCESS; } + virtual Status Compute(const ge::OpDescPtr &op_desc_ptr, const std::vector &inputs, + std::vector &outputs) = 0; + + protected: + const RunContext &run_context_; + const Node &node_; +}; +} // namespace host_aicpu +} // namespace ge + +#endif // GE_HOST_AICPU_ENGINE_OPS_KERNEL_STORE_OP_OP_H_ diff --git a/src/ge/host_aicpu_engine/ops_kernel_store/op/op_factory.cc b/src/ge/host_aicpu_engine/ops_kernel_store/op/op_factory.cc new file mode 100644 index 00000000..ec376d8a --- /dev/null +++ b/src/ge/host_aicpu_engine/ops_kernel_store/op/op_factory.cc @@ -0,0 +1,55 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "host_aicpu_engine/ops_kernel_store/op/op_factory.h" +#include "framework/common/debug/ge_log.h" +#include "common/ge_inner_error_codes.h" +#include "graph/op_desc.h" + +namespace ge { +namespace host_aicpu { +OpFactory &OpFactory::Instance() { + static OpFactory instance; + return instance; +} + +std::shared_ptr OpFactory::CreateOp(const Node &node, RunContext &run_context) { + auto iter = op_creator_map_.find(node.GetType()); + if (iter != op_creator_map_.end()) { + return iter->second(node, run_context); + } + + GELOGE(FAILED, "Not supported OP, type = %s, name = %s", node.GetType().c_str(), node.GetName().c_str()); + return nullptr; +} + +void OpFactory::RegisterCreator(const std::string &type, const OP_CREATOR_FUNC &func) { + if (func == nullptr) { + GELOGW("Func is NULL."); + return; + } + + auto iter = op_creator_map_.find(type); + if (iter != op_creator_map_.end()) { + GELOGW("%s creator already exist", type.c_str()); + return; + } + + op_creator_map_[type] = func; + all_ops_.emplace_back(type); +} +} // namespace host_aicpu +} // namespace ge diff --git a/src/ge/host_aicpu_engine/ops_kernel_store/op/op_factory.h b/src/ge/host_aicpu_engine/ops_kernel_store/op/op_factory.h new file mode 100644 index 00000000..007bceaa --- /dev/null +++ b/src/ge/host_aicpu_engine/ops_kernel_store/op/op_factory.h @@ -0,0 +1,94 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_HOST_AICPU_ENGINE_OPS_KERNEL_STORE_OP_OP_FACTORY_H_ +#define GE_HOST_AICPU_ENGINE_OPS_KERNEL_STORE_OP_OP_FACTORY_H_ + +#include +#include +#include +#include +#include +#include "common/ge/ge_util.h" +#include "host_aicpu_engine/ops_kernel_store/op/op.h" + +namespace ge { +namespace host_aicpu { +using OP_CREATOR_FUNC = std::function(const Node &, RunContext &)>; + +/** + * manage all the op, support create op. + */ +class OpFactory { + public: + static OpFactory &Instance(); + + /** + * @brief create Op. + * @param [in] node share ptr of node + * @param [in] run_context run context + * @return not nullptr success + * @return nullptr fail + */ + std::shared_ptr CreateOp(const Node &node, RunContext &run_context); + + /** + * @brief Register Op create function. + * @param [in] type Op type + * @param [in] func Op create func + */ + void RegisterCreator(const std::string &type, const OP_CREATOR_FUNC &func); + + const std::vector &GetAllOps() const { return all_ops_; } + + bool CheckSupported(const std::string &type) { return op_creator_map_.find(type) != op_creator_map_.end(); } + + OpFactory(const OpFactory &) = delete; + OpFactory &operator=(const OpFactory &) = delete; + OpFactory(OpFactory &&) = delete; + OpFactory &operator=(OpFactory &&) = delete; + + private: + OpFactory() = default; + ~OpFactory() = default; + + // the op creator function map + std::map op_creator_map_; + std::vector all_ops_; +}; + +class OpRegistrar { + public: + OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) { + OpFactory::Instance().RegisterCreator(type, func); + } + ~OpRegistrar() = default; + + OpRegistrar(const OpRegistrar &) = delete; + OpRegistrar &operator=(const OpRegistrar &) = delete; + OpRegistrar(OpRegistrar &&) = delete; + OpRegistrar &operator=(OpRegistrar &&) = delete; +}; + +#define REGISTER_OP_CREATOR(type, clazz) \ + std::shared_ptr Creator_##type##Op(const Node &node, RunContext &run_context) { \ + return MakeShared(node, run_context); \ + } \ + OpRegistrar g_##type##Op_creator(#type, Creator_##type##Op) +} // namespace host_aicpu +} // namespace ge + +#endif // GE_HOST_AICPU_ENGINE_OPS_KERNEL_STORE_OP_OP_FACTORY_H_ diff --git a/src/ge/host_aicpu_engine/ops_kernel_store/op/random_uniform_op.cc b/src/ge/host_aicpu_engine/ops_kernel_store/op/random_uniform_op.cc new file mode 100644 index 00000000..81768f7a --- /dev/null +++ b/src/ge/host_aicpu_engine/ops_kernel_store/op/random_uniform_op.cc @@ -0,0 +1,104 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "host_aicpu_engine/ops_kernel_store/op/random_uniform_op.h" +#include +#include "framework/common/debug/ge_log.h" +#include "framework/common/util.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/utils/type_utils.h" +#include "host_aicpu_engine/ops_kernel_store/op/op_factory.h" + +namespace ge { +namespace host_aicpu { +Status RandomUniformOp::Compute(const ge::OpDescPtr &op_desc_ptr, const std::vector &inputs, + std::vector &outputs) { + GELOGI("RandomUniformOp [%s, %s] compute begin.", node_.GetName().c_str(), node_.GetType().c_str()); + int64_t seed = 0; + int64_t seed2 = 0; + (void)AttrUtils::GetInt(op_desc_ptr, "seed", seed); + (void)AttrUtils::GetInt(op_desc_ptr, "seed2", seed2); + DataType data_type = DT_UNDEFINED; + if (AttrUtils::GetDataType(op_desc_ptr, VAR_ATTR_DTYPE, data_type) != GRAPH_SUCCESS) { + GELOGE(PARAM_INVALID, "get attr VAR_ATTR_DTYPE failed"); + return PARAM_INVALID; + } + + switch (data_type) { + case DT_FLOAT16: + break; + case DT_FLOAT: + if (Generate(op_desc_ptr, seed, seed2, outputs) != SUCCESS) { + GELOGE(FAILED, "Generate random_distribution for RandomUniformOp failed, data_type=DT_FLOAT"); + return FAILED; + } + break; + case DT_DOUBLE: + if (Generate(op_desc_ptr, seed, seed2, outputs) != SUCCESS) { + GELOGE(FAILED, "Generate random_distribution for RandomUniformOp failed, data_type=DT_DOUBLE"); + return FAILED; + } + break; + default: + GELOGE(UNSUPPORTED, "Supported DataType for RandomUniformOp is DT_FLOAT16 / DT_FLOAT / DT_DOUBLE, but dtype=%s", + TypeUtils::DataTypeToSerialString(data_type).c_str()); + return UNSUPPORTED; + } + + GELOGI("RandomUniformOp [%s, %s] compute success.", node_.GetName().c_str(), node_.GetType().c_str()); + return SUCCESS; +} + +template +Status RandomUniformOp::Generate(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, + std::vector &outputs) { + GE_CHECK_NOTNULL(op_desc_ptr); + // RandomUniformOp has and only has one output + int64_t data_num = op_desc_ptr->GetOutputDesc(0).GetShape().GetShapeSize(); + std::unique_ptr buf(new (std::nothrow) T[data_num]()); + if (buf == nullptr) { + GELOGE(MEMALLOC_FAILED, "New sizeof(T) * data_num(%zu) memory failed", static_cast(sizeof(T) * data_num)); + return MEMALLOC_FAILED; + } + + int64_t final_seed; + if (seed == 0) { + if (seed2 == 0) { + std::random_device rd; + final_seed = rd(); + } else { + final_seed = seed2; + } + } else { + final_seed = seed; + } + std::mt19937_64 gen(final_seed); + std::uniform_real_distribution distribution(0, 1); + for (int64_t i = 0; i < data_num; i++) { + *(buf.get() + i) = distribution(gen); + } + + GeTensorPtr output = + MakeShared(op_desc_ptr->GetOutputDesc(0), reinterpret_cast(buf.get()), data_num * sizeof(T)); + GE_CHECK_NOTNULL(output); + outputs.emplace_back(output); + + return SUCCESS; +} + +REGISTER_OP_CREATOR(RandomUniform, RandomUniformOp); +} // namespace host_aicpu +} // namespace ge diff --git a/src/ge/host_aicpu_engine/ops_kernel_store/op/random_uniform_op.h b/src/ge/host_aicpu_engine/ops_kernel_store/op/random_uniform_op.h new file mode 100644 index 00000000..dfb2485f --- /dev/null +++ b/src/ge/host_aicpu_engine/ops_kernel_store/op/random_uniform_op.h @@ -0,0 +1,45 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_HOST_AICPU_ENGINE_OPS_KERNEL_STORE_OP_RANDOM_UNIFORM_OP_H_ +#define GE_HOST_AICPU_ENGINE_OPS_KERNEL_STORE_OP_RANDOM_UNIFORM_OP_H_ + +#include "host_aicpu_engine/ops_kernel_store/op/op.h" + +namespace ge { +namespace host_aicpu { +class RandomUniformOp : public Op { + public: + RandomUniformOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} + ~RandomUniformOp() override = default; + RandomUniformOp &operator=(const RandomUniformOp &op) = delete; + RandomUniformOp(const RandomUniformOp &op) = delete; + + /** + * @brief compute for node_task. + * @return result + */ + Status Compute(const ge::OpDescPtr &op_desc_ptr, const std::vector &inputs, + std::vector &outputs) override; + + private: + template + Status Generate(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, std::vector &outputs); +}; +} // namespace host_aicpu +} // namespace ge + +#endif // GE_HOST_AICPU_ENGINE_OPS_KERNEL_STORE_OP_RANDOM_UNIFORM_OP_H_ diff --git a/src/ge/host_aicpu_engine/ops_kernel_store/op/variable_op.cc b/src/ge/host_aicpu_engine/ops_kernel_store/op/variable_op.cc new file mode 100644 index 00000000..effa346b --- /dev/null +++ b/src/ge/host_aicpu_engine/ops_kernel_store/op/variable_op.cc @@ -0,0 +1,46 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "host_aicpu_engine/ops_kernel_store/op/variable_op.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/util.h" +#include "host_aicpu_engine/ops_kernel_store/op/op_factory.h" + +namespace { +const size_t kInputSize = 1; +} + +namespace ge { +namespace host_aicpu { +Status VariableOp::Compute(const ge::OpDescPtr &op_desc_ptr, const std::vector &inputs, + std::vector &outputs) { + GELOGI("VariableOp [%s, %s] compute begin.", node_.GetName().c_str(), node_.GetType().c_str()); + if (inputs.size() != kInputSize) { + GELOGE(PARAM_INVALID, "Number of input for VariableOp must be %zu.", kInputSize); + return PARAM_INVALID; + } + GeTensorPtr output_ptr = + MakeShared(op_desc_ptr->GetOutputDesc(0), inputs[0]->GetData().GetData(), inputs[0]->GetData().GetSize()); + GE_CHECK_NOTNULL(output_ptr); + outputs.push_back(output_ptr); + GELOGI("VariableOp [%s, %s] compute success.", node_.GetName().c_str(), node_.GetType().c_str()); + return SUCCESS; +} + +REGISTER_OP_CREATOR(Variable, VariableOp); +REGISTER_OP_CREATOR(Constant, VariableOp); +} // namespace host_aicpu +} // namespace ge diff --git a/src/ge/host_aicpu_engine/ops_kernel_store/op/variable_op.h b/src/ge/host_aicpu_engine/ops_kernel_store/op/variable_op.h new file mode 100644 index 00000000..b6570557 --- /dev/null +++ b/src/ge/host_aicpu_engine/ops_kernel_store/op/variable_op.h @@ -0,0 +1,41 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_HOST_AICPU_ENGINE_OPS_KERNEL_STORE_OP_VARIABLE_OP_H_ +#define GE_HOST_AICPU_ENGINE_OPS_KERNEL_STORE_OP_VARIABLE_OP_H_ + +#include "host_aicpu_engine/ops_kernel_store/op/op.h" + +namespace ge { +namespace host_aicpu { +class VariableOp : public Op { + public: + VariableOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} + ~VariableOp() override = default; + VariableOp &operator=(const VariableOp &op) = delete; + VariableOp(const VariableOp &op) = delete; + + /** + * @brief compute for node_task. + * @return result + */ + Status Compute(const ge::OpDescPtr &op_desc_ptr, const std::vector &inputs, + std::vector &outputs) override; +}; +} // namespace host_aicpu +} // namespace ge + +#endif // GE_HOST_AICPU_ENGINE_OPS_KERNEL_STORE_OP_VARIABLE_OP_H_ diff --git a/src/ge/host_kernels/rank_kernel.cc b/src/ge/host_kernels/rank_kernel.cc index c8763aef..7fb92039 100644 --- a/src/ge/host_kernels/rank_kernel.cc +++ b/src/ge/host_kernels/rank_kernel.cc @@ -19,6 +19,7 @@ #include #include +#include "graph/types.h" #include "common/ge_inner_error_codes.h" #include "common/op/ge_op_utils.h" #include "framework/common/debug/ge_log.h" @@ -46,6 +47,9 @@ Status RankKernel::Compute(const NodePtr &node, std::vector &v_outp const auto &input_shape = op_desc->MutableInputDesc(kRankDataInputIndex); GE_CHECK_NOTNULL(input_shape); + if (input_shape->GetShape().GetDims() == UNKNOWN_RANK) { + return NOT_CHANGED; + } auto ndims = input_shape->GetShape().GetDimNum(); GeTensorDesc tensor_desc(op_desc->GetOutputDesc(0)); GeTensorPtr output_ptr; diff --git a/src/ge/hybrid/executor/node_state.cc b/src/ge/hybrid/executor/node_state.cc index 5368597d..c78dd725 100644 --- a/src/ge/hybrid/executor/node_state.cc +++ b/src/ge/hybrid/executor/node_state.cc @@ -33,7 +33,7 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item( } void ShapeInferenceState::UpdateInputShape(uint32_t idx, const GeShape &ori_shape, const GeShape &shape) { - if (node_item.is_input_shape_static[idx]) { + if (!node_item.is_dynamic || node_item.is_input_shape_static[idx]) { GELOGD("[%s] Trying to update static shape, idx = %u. old shape = [%s], new shape = [%s]", node_item.NodeName().c_str(), idx, node_item.op_desc->MutableInputDesc(idx)->GetShape().ToString().c_str(), shape.ToString().c_str()); @@ -52,7 +52,7 @@ void ShapeInferenceState::UpdateInputShape(uint32_t idx, const GeShape &ori_shap } void ShapeInferenceState::UpdateInputShapeFuture(uint32_t idx, ShapeFuture &&future) { - if (node_item.is_input_shape_static[idx]) { + if (!node_item.is_dynamic || node_item.is_input_shape_static[idx]) { GELOGD("[%s] Trying to update constant shape, idx = %u", node_item.NodeName().c_str(), idx); return; } @@ -66,6 +66,9 @@ void ShapeInferenceState::UpdateInputShapeFuture(uint32_t idx, ShapeFuture &&fut } Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &context) { + if (!node_item.is_dynamic) { + return SUCCESS; + } std::unique_lock lk(mu_); if (num_pending_shapes_ > 0) { GELOGD("[%s] Await pending shape or shape future start.", node_item.NodeName().c_str()); @@ -144,4 +147,4 @@ Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) { return SUCCESS; } } // namespace hybrid -} // namespace ge \ No newline at end of file +} // namespace ge diff --git a/src/ge/hybrid/executor/subgraph_executor.cc b/src/ge/hybrid/executor/subgraph_executor.cc index 3d699970..7664e90d 100644 --- a/src/ge/hybrid/executor/subgraph_executor.cc +++ b/src/ge/hybrid/executor/subgraph_executor.cc @@ -349,7 +349,7 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) { GELOGD("[%s] Updating output tensor, index = %d, tensor = %s", graph_item_->GetName().c_str(), parent_output_index, outputs[i].DebugString().c_str()); - task_context.SetOutput(parent_output_index, outputs[i]); + GE_CHK_STATUS_RET(task_context.SetOutput(parent_output_index, outputs[i])); // updating shapes. dynamic format/dtype is not supported. // It should be noted that even the subgraph is of known shape, it is also necessary to update parent output desc, @@ -370,4 +370,4 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) { return SUCCESS; } } // namespace hybrid -} // namespace ge \ No newline at end of file +} // namespace ge diff --git a/src/ge/hybrid/executor/worker/shape_inference_engine.cc b/src/ge/hybrid/executor/worker/shape_inference_engine.cc index f600e94a..650bcc54 100644 --- a/src/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/src/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -29,6 +29,9 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { GE_CHK_STATUS_RET_NOLOG(node_state.GetShapeInferenceState().AwaitShapesReady(*execution_context_)); auto &node_item = *node_state.GetNodeItem(); + if (node_item.is_output_shape_static) { + return SUCCESS; + } // Skip shape inference for node of type DEPEND_COMPUTE if (node_item.shape_inference_type == DEPEND_COMPUTE) { GELOGD("[%s] Skipping node with unknown shape type DEPEND_COMPUTE", node_item.NodeName().c_str()); @@ -48,10 +51,12 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { // Do shape inference GELOGD("[%s] Start to invoke InferShapeAndType", node_item.NodeName().c_str()); - RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); - GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndType(node_item.node), "Invoke InferShapeAndType failed."); - RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End"); - + { + std::lock_guard lk(mu_); + RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); + GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndType(node_item.node), "Invoke InferShapeAndType failed."); + RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End"); + } // Check again to make sure shape is valid after shape inference if (node_item.shape_inference_type != DEPEND_SHAPE_RANGE) { bool is_unknown_shape = false; @@ -89,6 +94,10 @@ Status ShapeInferenceEngine::AwaitDependentNodes(NodeState &node_state) { } Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) { + if (node_item.is_output_shape_static) { + return SUCCESS; + } + // output shape will not be valid until compute is done. bool shape_is_future = node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE; diff --git a/src/ge/hybrid/executor/worker/shape_inference_engine.h b/src/ge/hybrid/executor/worker/shape_inference_engine.h index 972f8ee1..65878818 100644 --- a/src/ge/hybrid/executor/worker/shape_inference_engine.h +++ b/src/ge/hybrid/executor/worker/shape_inference_engine.h @@ -19,6 +19,7 @@ #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/executor/subgraph_context.h" +#include namespace ge { namespace hybrid { @@ -36,6 +37,7 @@ class ShapeInferenceEngine { GraphExecutionContext *execution_context_; SubgraphContext *subgraph_context_; + std::mutex mu_; }; } // namespace hybrid } // namespace ge diff --git a/src/ge/hybrid/model/graph_item.cc b/src/ge/hybrid/model/graph_item.cc index 528fc4ee..96250891 100644 --- a/src/ge/hybrid/model/graph_item.cc +++ b/src/ge/hybrid/model/graph_item.cc @@ -30,13 +30,10 @@ const vector &GraphItem::GetInputNodes() const { return input_ Status GraphItem::GetOutputDescList(vector &output_desc_list) const { if (is_dynamic_) { - for (auto &node_and_idx : output_edges_) { - const auto &tensor_desc = node_and_idx.first->op_desc->MutableOutputDesc(node_and_idx.second); - GE_CHECK_NOTNULL(tensor_desc); + for (auto &tensor_desc : output_node_->op_desc->GetAllInputsDescPtr()) { output_desc_list.emplace_back(tensor_desc); } } else { - auto all_output_desc = output_node_->op_desc->GetAllOutputsDescPtr(); for (auto &tensor_desc : output_node_->op_desc->GetAllOutputsDescPtr()) { output_desc_list.emplace_back(tensor_desc); } diff --git a/src/ge/hybrid/model/hybrid_model_builder.cc b/src/ge/hybrid/model/hybrid_model_builder.cc index 841f1f15..52063f93 100644 --- a/src/ge/hybrid/model/hybrid_model_builder.cc +++ b/src/ge/hybrid/model/hybrid_model_builder.cc @@ -159,13 +159,6 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n (void)AttrUtils::SetBool(new_node->op_desc, kIsFirstNode, false); (void)AttrUtils::SetBool(new_node->op_desc, kIsLastNode, false); - int32_t unknown_shape_type_val = 0; - (void)AttrUtils::GetInt(new_node->op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); - new_node->shape_inference_type = static_cast(unknown_shape_type_val); - - GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, new_node->is_dynamic), - "[%s] Failed to get shape status.", node->GetName().c_str()); - if (new_node->is_dynamic && (new_node->IsControlOp() || new_node->NodeType() == PARTITIONEDCALL)) { new_node->shape_inference_type = DEPEND_COMPUTE; } @@ -545,6 +538,15 @@ Status HybridModelBuilder::BuildOutputMapping(GraphItem &graph_item, const NodeI Status HybridModelBuilder::LoadGraph() { auto root_graph = ge_root_model_->GetRootGraph(); + std::shared_ptr merged_graph; + GELOGI("Before merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", root_graph->GetDirectNodesSize(), + root_graph->GetAllNodesSize()); + GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraphs(*root_graph, merged_graph), "Failed to unfold subgraphs."); + root_graph = std::move(merged_graph); + GELOGI("After merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", root_graph->GetDirectNodesSize(), + root_graph->GetAllNodesSize()); + GE_DUMP(root_graph, "hybrid_merged_graph"); + GE_CHK_STATUS_RET(LoadDynamicSubgraph(*root_graph, true), "Failed to load root graph."); GELOGD("Done loading root graph successfully."); diff --git a/src/ge/hybrid/model/node_item.cc b/src/ge/hybrid/model/node_item.cc index e1cd7f64..bfc29c84 100644 --- a/src/ge/hybrid/model/node_item.cc +++ b/src/ge/hybrid/model/node_item.cc @@ -17,6 +17,8 @@ #include "node_item.h" #include #include "common/debug/log.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/utils/node_utils.h" #include "hybrid/node_executor/node_executor.h" namespace ge { @@ -31,16 +33,34 @@ NodeItem::NodeItem(NodePtr node) : node(std::move(node)) { } Status NodeItem::Init() { - for (int i = 0; i < num_inputs; ++i) { - const auto &input_desc = op_desc->MutableInputDesc(i); - GE_CHECK_NOTNULL(input_desc); - if (input_desc->MutableShape().IsUnknownShape()) { - is_input_shape_static.push_back(false); - } else { - num_static_input_shapes++; - is_input_shape_static.push_back(true); - GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", NodeName().c_str(), i, - input_desc->MutableShape().ToString().c_str()); + int32_t unknown_shape_type_val = 0; + (void)AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); + shape_inference_type = static_cast(unknown_shape_type_val); + + GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_dynamic), "[%s] Failed to get shape status.", + node->GetName().c_str()); + + if (is_dynamic) { + for (int i = 0; i < num_inputs; ++i) { + const auto &input_desc = op_desc->MutableInputDesc(i); + GE_CHECK_NOTNULL(input_desc); + if (input_desc->MutableShape().IsUnknownShape()) { + is_input_shape_static.push_back(false); + } else { + num_static_input_shapes++; + is_input_shape_static.push_back(true); + GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", NodeName().c_str(), i, + input_desc->MutableShape().ToString().c_str()); + } + } + + for (int i = 0; i < num_outputs; ++i) { + const auto &output_desc = op_desc->MutableOutputDesc(i); + GE_CHECK_NOTNULL(output_desc); + if (output_desc->MutableShape().IsUnknownShape()) { + is_output_shape_static = false; + break; + } } } @@ -59,6 +79,7 @@ std::string NodeItem::DebugString() const { ss << ", name = [" << node->GetName(); ss << "], type = " << node->GetType(); ss << ", is_dynamic = " << (is_dynamic ? "True" : "False"); + ss << ", is_output_static = " << (is_output_shape_static ? "True" : "False"); ss << ", unknown_shape_op_type = " << shape_inference_type; ss << ", input_start = " << input_start; ss << ", num_inputs = " << num_inputs; @@ -91,6 +112,5 @@ void NodeItem::SetToDynamic() { kernel_task = nullptr; } } - } // namespace hybrid -} // namespace ge \ No newline at end of file +} // namespace ge diff --git a/src/ge/hybrid/model/node_item.h b/src/ge/hybrid/model/node_item.h index 4e6d770b..ff024b36 100644 --- a/src/ge/hybrid/model/node_item.h +++ b/src/ge/hybrid/model/node_item.h @@ -42,6 +42,8 @@ struct NodeItem { bool IsControlOp() const; + bool NeedInfershape() const; + void SetToDynamic(); std::string DebugString() const; @@ -73,6 +75,7 @@ struct NodeItem { std::map reuse_inputs; std::vector is_input_shape_static; + bool is_output_shape_static = true; int num_static_input_shapes = 0; }; } // namespace hybrid diff --git a/src/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/src/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index 50c8e899..71280649 100644 --- a/src/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/src/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -74,11 +74,11 @@ Status AiCoreNodeExecutor::GenNodeKey(const NodePtr &node, std::string &node_key GE_CHECK_NOTNULL(op_desc); // make sure unique, (op_id + input_shape) is unique - node_key = std::to_string(op_desc->GetId()) + "/"; + node_key = std::to_string(op_desc->GetId()) + "-"; node_key.append(std::to_string(op_desc->GetInputsSize())); auto input_descs = op_desc->GetAllInputsDescPtr(); for (auto &input_desc : input_descs) { - node_key.push_back('/'); + node_key.push_back('-'); auto &shape = input_desc->MutableShape(); auto num_dims = shape.GetDimNum(); if (num_dims == 0) { @@ -86,7 +86,7 @@ Status AiCoreNodeExecutor::GenNodeKey(const NodePtr &node, std::string &node_key } // scalar for (std::size_t i = 0; i < num_dims - 1; i++) { node_key.append(std::to_string(shape.GetDim(i))); - node_key.push_back(','); + node_key.push_back('_'); } node_key.append(std::to_string(shape.GetDim(num_dims - 1))); } @@ -114,13 +114,15 @@ std::shared_ptr AiCoreNodeTaskRegistry::GetTask(const std::string &nod Status AiCoreNodeExecutor::CompileTask(const HybridModel &model, const NodePtr &node, shared_ptr &task) const { GE_CHECK_NOTNULL(node); + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); GELOGI("AiCoreNodeExecutor(%s) CompileTask Start.", node->GetName().c_str()); AiCoreNodeTaskRegistry ®istry = AiCoreNodeTaskRegistry::GetInstance(); - std::string node_key; - GE_CHK_STATUS_RET(GenNodeKey(node, node_key), "GenNodeKey failed, op name = %s.", node->GetName().c_str()); + std::string shape_key; + GE_CHK_STATUS_RET(GenNodeKey(node, shape_key), "GenNodeKey failed, op name = %s.", node->GetName().c_str()); - node_key = std::to_string(model.GetModelId()) + "/" + node_key; + auto node_key = std::to_string(model.GetModelId()) + "/" + shape_key; GELOGD("NodeKey for %s = %s", node->GetName().c_str(), node_key.c_str()); task = registry.GetTask(node_key); if (task != nullptr) { @@ -129,7 +131,10 @@ Status AiCoreNodeExecutor::CompileTask(const HybridModel &model, const NodePtr & } std::vector task_defs; - GE_CHK_STATUS_RET(compiler_->CompileOp(node, task_defs), "Compile op(%s) failed.", node->GetName().c_str()); + auto ori_node_name = node->GetName(); + op_desc->SetName(ori_node_name + "_" + shape_key); + GE_CHK_STATUS_RET(compiler_->CompileOp(node, task_defs), "Compile op(%s) failed.", ori_node_name.c_str()); + op_desc->SetName(ori_node_name); GELOGD("successfully generated task_defs: %s", node->GetName().c_str()); AiCoreTaskBuilder builder(node->GetOpDesc(), task_defs); diff --git a/src/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/src/ge/hybrid/node_executor/aicore/aicore_op_task.cc index f5a4af83..9ec0cc22 100644 --- a/src/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/src/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -284,6 +284,14 @@ Status AtomicAddrCleanOpTask::InitAtomicAddrCleanIndices(const OpDesc &op_desc) std::string AtomicAddrCleanOpTask::GetKeyForOpParamSize() const { return kAttrAtomicOpParamSize; } +Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { + GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); + GE_CHK_STATUS_RET(OpAtomicCalculate(*node, tiling_info), "Failed calc tiling data of node %s.", + node->GetName().c_str()); + GELOGD("[%s] Done invoking OpAtomicCalculate successfully.", node->GetName().c_str()); + return SUCCESS; +} + Status AtomicAddrCleanOpTask::UpdateArgs(TaskContext &task_context) { // refresh atomic output addr int index = 0; diff --git a/src/ge/hybrid/node_executor/aicore/aicore_op_task.h b/src/ge/hybrid/node_executor/aicore/aicore_op_task.h index 74876588..41ab0d79 100644 --- a/src/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/src/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -75,6 +75,7 @@ class AtomicAddrCleanOpTask : public AiCoreOpTask { protected: std::string GetKeyForOpParamSize() const override; + Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info) override; private: Status InitAtomicAddrCleanIndices(const OpDesc &op_desc); diff --git a/src/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc b/src/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc index 9119bebb..588f179d 100644 --- a/src/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc +++ b/src/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc @@ -54,6 +54,8 @@ Status AiCoreTaskCompiler::CompileOp(const NodePtr &node, std::vector output_offsets(op_desc->GetOutputsSize(), kMemBase); op_desc->SetInputOffset(input_offsets); op_desc->SetOutputOffset(output_offsets); + std::vector workspaces(op_desc->GetWorkspaceBytes().size(), kMemBase); + op_desc->SetWorkspace(std::move(workspaces)); GE_CHK_STATUS_RET_NOLOG(DoGenerateTask(*aic_kernel_store_, *node, tasks)); GELOGD("successfully generated task: %s", node->GetName().c_str()); GELOGI("AiCoreTaskCompiler(%s) CompileOp End.", node->GetName().c_str()); diff --git a/src/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/src/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc index afa53724..2e1893f2 100644 --- a/src/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc +++ b/src/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc @@ -49,19 +49,12 @@ Status KnownNodeTask::ExecuteAsync(TaskContext &context, std::function d rtError_t rt_ret; GELOGI("rtModelExecute start."); - rt_ret = rtModelExecute(davinci_model_->GetRtModelHandle(), davinci_model_->GetRtModelStream(), 0); + rt_ret = rtModelExecute(davinci_model_->GetRtModelHandle(), context.GetStream(), 0); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtModelExecute error, ret: Ox%X", rt_ret); return FAILED;); GELOGI("rtModelExecute end"); - GELOGI("rtStreamSynchronize start."); - rt_ret = rtStreamSynchronize(davinci_model_->GetRtModelStream()); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtStreamSynchronize error, ret: Ox%X", rt_ret); - return FAILED;); - GELOGI("rtStreamSynchronize end."); - context.RegisterCallback(done_callback); GELOGI("[%s] KnownNodeTask::ExecuteAsync success.", context.GetNodeName()); - return SUCCESS; } @@ -88,7 +81,8 @@ Status KnownNodeTask::UpdateArgs(TaskContext &context) { GE_CHK_STATUS_RET(davinci_model_->UpdateKnownNodeArgs(inputs, outputs), "known node task update known node args failed."); - GELOGI("[%s] KnownNodeExecutor::UpdateArgs success.", context.GetNodeName()); + GELOGI("[%s] KnownNodeExecutor::UpdateArgs success, task_size = %d:", context.GetNodeName(), + davinci_model_->GetTaskList().size()); return SUCCESS; } @@ -105,8 +99,14 @@ Status KnownNodeTask::Init(TaskContext &context) { // allocate mem base void *buffer = nullptr; if (davinci_model_->TotalMemSize() != 0) { - GE_CHK_STATUS_RET(context.AllocateWorkspace(davinci_model_->TotalMemSize(), &buffer), - "known node task allocate workspace failed."); + GE_CHK_STATUS_RET( + context.AllocateWorkspace(davinci_model_->TotalMemSize(), &buffer, davinci_model_->GetRuntimeParam().mem_base), + "known node task allocate workspace failed."); + bool addr_not_changed = false; + if (davinci_model_->GetRuntimeParam().mem_base == buffer) { + addr_not_changed = true; + } + davinci_model_->SetKnownNodeAddrNotChanged(addr_not_changed); // update mem base davinci_model_->UpdateMemBase(static_cast(buffer)); GELOGI("KnownNodeTask::Init mem base is %p, size %u.", davinci_model_->GetRuntimeParam().mem_base, @@ -126,7 +126,6 @@ Status KnownNodeTask::Init(TaskContext &context) { Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { GELOGI("[%s] KnownNodeExecutor::PrepareTask in.", context.GetNodeName()); - GE_CHK_STATUS_RET(task.Init(context), "known node init davinci model failed."); GE_CHK_STATUS_RET(task.UpdateArgs(context), "known node task update args failed."); diff --git a/src/ge/hybrid/node_executor/controlop/control_op_executor.cc b/src/ge/hybrid/node_executor/controlop/control_op_executor.cc index 1f18db3d..aee7fb77 100644 --- a/src/ge/hybrid/node_executor/controlop/control_op_executor.cc +++ b/src/ge/hybrid/node_executor/controlop/control_op_executor.cc @@ -27,7 +27,7 @@ Status ControlOpNodeTask::ExecuteSubgraph(const GraphItem *subgraph, TaskContext const std::function &done_callback) { GELOGD("[%s] Start to execute subgraph.", subgraph->GetName().c_str()); auto execution_context = const_cast(task_context.GetExecutionContext()); - auto executor = MakeShared(subgraph, execution_context, task_context.IsForceInferShape()); + auto executor = MakeShared(subgraph, execution_context); GE_CHECK_NOTNULL(executor); GE_CHK_STATUS_RET(executor->ExecuteAsync(task_context), "[%s] Failed to execute partitioned call.", subgraph->GetName().c_str()); @@ -186,13 +186,34 @@ Status WhileOpNodeTask::DoExecuteAsync(TaskContext &task_context, const std::fun return INTERNAL_ERROR; } - // graph build can not set accurate flag unknown_shape_status by now. - // Treating all nodes in while scope as unknown shape. - task_context.SetForceInferShape(true); + bool is_continue = false; + GE_CHK_STATUS_RET(ExecuteOneLoop(task_context, is_continue), "[%s] Failed to execute iteration 0.", + task_context.GetNodeName()); + if (!is_continue) { + for (int i = 0; i < task_context.NumInputs(); ++i) { + auto input_tensor = task_context.GetInput(i); + auto input_tensor_desc = task_context.GetInputDesc(i); + auto output_tensor_desc = task_context.MutableOutputDesc(i); + GE_CHECK_NOTNULL(input_tensor); + GE_CHECK_NOTNULL(input_tensor_desc); + GE_CHECK_NOTNULL(output_tensor_desc); + GE_CHK_STATUS_RET_NOLOG(task_context.SetOutput(i, *input_tensor)); + *output_tensor_desc = *input_tensor_desc; + } - int iteration = 0; + return SUCCESS; + } + + // backup original input tensor desc + std::vector ori_input_desc; + for (int i = 0; i < task_context.NumInputs(); ++i) { + auto tensor_desc = task_context.GetInputDesc(i); + GE_CHECK_NOTNULL(tensor_desc); + ori_input_desc.emplace_back(*tensor_desc); + } + + int iteration = 1; while (true) { - bool is_continue = false; GELOGD("[%s] Start to execute, iteration = %d", task_context.GetNodeName(), iteration); GE_CHK_STATUS_RET(ExecuteOneLoop(task_context, is_continue), "[%s] Failed to execute iteration %d.", task_context.GetNodeName(), iteration); @@ -205,6 +226,16 @@ Status WhileOpNodeTask::DoExecuteAsync(TaskContext &task_context, const std::fun ++iteration; } + for (int i = 0; i < task_context.NumInputs(); ++i) { + auto input_tensor = task_context.GetInput(i); + auto tensor_desc = task_context.MutableInputDesc(i); + GE_CHECK_NOTNULL(input_tensor); + GE_CHECK_NOTNULL(tensor_desc); + // restore original input tensor desc + *tensor_desc = std::move(ori_input_desc[i]); + GE_CHK_STATUS_RET_NOLOG(task_context.SetOutput(i, *input_tensor)); + } + return SUCCESS; } @@ -268,11 +299,6 @@ Status WhileOpNodeTask::ExecuteOneLoop(TaskContext &task_context, bool &is_conti GE_CHK_STATUS_RET(ExecuteCond(task_context, is_continue), "[%s] Failed to execute cond-subgraph", task_context.GetNodeName()); if (!is_continue) { - for (int i = 0; i < task_context.NumInputs(); ++i) { - auto input_tensor = task_context.GetInput(i); - GE_CHECK_NOTNULL(input_tensor); - task_context.SetOutput(i, *input_tensor); - } return SUCCESS; } diff --git a/src/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/src/ge/hybrid/node_executor/hccl/hccl_node_executor.cc index e86c0cb0..f4fb7530 100644 --- a/src/ge/hybrid/node_executor/hccl/hccl_node_executor.cc +++ b/src/ge/hybrid/node_executor/hccl/hccl_node_executor.cc @@ -153,7 +153,7 @@ Status HcclNodeExecutor::ExecuteTask(NodeTask &task, TaskContext &context, } Status HcclNodeExecutor::Initialize() { - std::string file_name = "libhccl.so"; + std::string file_name = "libhcom_graph_adaptor.so"; std::string path = PluginManager::GetPath(); path.append(file_name); string canonical_path = RealPath(path.c_str()); diff --git a/src/ge/hybrid/node_executor/hostaicpu/host_aicpu_node_executor.cc b/src/ge/hybrid/node_executor/hostaicpu/host_aicpu_node_executor.cc new file mode 100644 index 00000000..36336f8c --- /dev/null +++ b/src/ge/hybrid/node_executor/hostaicpu/host_aicpu_node_executor.cc @@ -0,0 +1,198 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "host_aicpu_engine/ops_kernel_store/op/op_factory.h" +#include "hybrid/node_executor/hostaicpu/host_aicpu_node_executor.h" +#include "graph/passes/folding_pass.h" +#include "hybrid/model/hybrid_model.h" +#include "inc/kernel_factory.h" +#include "ge_local_engine/engine/host_cpu_engine.h" + +namespace ge { +namespace hybrid { +REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::HOST_AICPU, HostAiCpuNodeExecutor); + +Status HostCpuNodeTaskBase::UpdateArgs(TaskContext &) { + // no need update args + return SUCCESS; +} + +Status HostCpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function done_callback) { + GELOGD("[%s] Start execute.", context.GetNodeName()); + + std::vector inputs; + std::vector outputs; + GE_CHK_STATUS_RET(ProcessInputs(context, inputs), "node:%s type:%s, process inputs failed.", node_->GetName().c_str(), + node_->GetType().c_str()); + GE_CHK_STATUS_RET(Execute(context, inputs, outputs), "node:%s type:%s, task execute failed.", + node_->GetName().c_str(), node_->GetType().c_str()); + GE_CHK_STATUS_RET(ProcessOutputs(context, outputs), "node:%s type:%s, process outputs failed.", + node_->GetName().c_str(), node_->GetType().c_str()); + + if (done_callback) { + GELOGD("[%s] Start invoke callback.", context.GetNodeName()); + done_callback(); + } + GELOGD("[%s] Done execute successfully.", context.GetNodeName()); + return SUCCESS; +} + +Status HostCpuNodeTaskBase::ProcessInputs(TaskContext &context, std::vector &inputs) { + // Constant & Variable + auto tensor = context.GetVariable(node_->GetName()); + if (tensor != nullptr) { + // Constant & Variable Op has and only has one output + GeTensorPtr input_ptr = MakeShared( + node_->GetOpDesc()->GetOutputDesc(0), reinterpret_cast(tensor->GetData()), tensor->GetSize()); + GE_CHECK_NOTNULL(input_ptr); + inputs.push_back(input_ptr); + return SUCCESS; + } + + int32_t input_num = context.NumInputs(); + for (auto i = 0; i < input_num; ++i) { + auto tensor_value = context.GetInput(i); + GE_CHECK_NOTNULL(tensor_value); + GeTensorPtr input_ptr = + MakeShared(node_->GetOpDesc()->GetInputDesc(i), + reinterpret_cast(tensor_value->GetData()), tensor_value->GetSize()); + if (input_ptr == nullptr) { + GELOGE(MEMALLOC_FAILED, "Make shared failed"); + return MEMALLOC_FAILED; + } + inputs.push_back(input_ptr); + } + return SUCCESS; +} + +Status HostCpuNodeTaskBase::ProcessOutputs(TaskContext &context, std::vector &outputs) { + int32_t output_num = context.NumOutputs(); + if (static_cast(output_num) != outputs.size()) { + GELOGE(INTERNAL_ERROR, "node %s type %s has %d output, but kernel compute only has %zu output.", + node_->GetName().c_str(), node_->GetType().c_str(), output_num, outputs.size()); + return INTERNAL_ERROR; + } + + // alloc output + GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs()); + + // copy data to output + for (auto i = 0; i < output_num; ++i) { + GeTensorPtr &tensor = outputs[i]; + GE_CHECK_NOTNULL(tensor); + auto tensor_data = tensor->GetData(); + auto tensor_value = context.MutableOutput(i); + GE_CHECK_NOTNULL(tensor_value); + if (tensor_data.GetSize() > tensor_value->GetSize()) { + GELOGE(INTERNAL_ERROR, "node:%s type:%s [%d]th compute data size=%zu, but context data size=%zu.", + node_->GetName().c_str(), node_->GetType().c_str(), i, tensor_data.GetSize(), tensor_value->GetSize()); + return INTERNAL_ERROR; + } + + GELOGI("node:%s type:%s [%d]th output data=%p, out size=%zu, data size=%zu.", node_->GetName().c_str(), + node_->GetType().c_str(), i, tensor_value->GetData(), tensor_value->GetSize(), tensor_data.GetSize()); + if (tensor_data.GetSize() > 0) { + GE_CHK_RT_RET(rtMemcpy(tensor_value->MutableData(), tensor_value->GetSize(), tensor_data.GetData(), + tensor_data.GetSize(), RT_MEMCPY_HOST_TO_HOST)); + } + GELOGI("node:%s type:%s [%d]th set data success, data size=%zu.", node_->GetName().c_str(), + node_->GetType().c_str(), i, tensor_data.GetSize()); + } + + return SUCCESS; +} + +Status CpuKernelNodeTask::Execute(TaskContext &context, const std::vector &inputs, + std::vector &outputs) { + std::vector const_inputs; + for (const auto &input : inputs) { + const_inputs.emplace_back(input); + } + return FoldingPass::RunOpKernel(node_, const_inputs, outputs); +} + +Status HostKernelNodeTask::Execute(TaskContext &context, const std::vector &inputs, + std::vector &outputs) { + auto kernel = KernelFactory::Instance().Create(node_->GetType()); + if (kernel == nullptr) { + GELOGE(UNSUPPORTED, "node %s type %s is not supported by host kernel.", node_->GetName().c_str(), + node_->GetType().c_str()); + return UNSUPPORTED; + } + + std::vector const_inputs; + for (const auto &input : inputs) { + const_inputs.emplace_back(input); + } + Status compute_ret = kernel->Compute(node_->GetOpDesc(), const_inputs, outputs); + if (compute_ret != SUCCESS) { + GELOGE(compute_ret, "node %s type %s compute failed or not imply.", node_->GetName().c_str(), + node_->GetType().c_str()); + return compute_ret; + } + + return SUCCESS; +} + +Status HostAiCpuNodeTask::Execute(TaskContext &context, const std::vector &inputs, + std::vector &outputs) { + RunContext run_context; + auto host_op = host_aicpu::OpFactory::Instance().CreateOp(*node_, run_context); + if (host_op == nullptr) { + GELOGE(UNSUPPORTED, "node %s type %s is not supported by host kernel.", node_->GetName().c_str(), + node_->GetType().c_str()); + return UNSUPPORTED; + } + + Status compute_ret = host_op->Compute(node_->GetOpDesc(), inputs, outputs); + if (compute_ret != SUCCESS) { + GELOGE(compute_ret, "node %s type %s compute failed or not imply.", node_->GetName().c_str(), + node_->GetType().c_str()); + return compute_ret; + } + + return SUCCESS; +} + +Status HostAiCpuNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { + return task.UpdateArgs(context); +} + +Status HostAiCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, + std::shared_ptr &task) const { + GE_CHECK_NOTNULL(node); + const std::string &name = node->GetName(); + const std::string &type = node->GetType(); + if (HostCpuEngine::GetInstance().CheckSupported(type)) { + GELOGI("create CpuKernelNodeTask for node %s, type %s.", name.c_str(), type.c_str()); + task = MakeShared(node); + GE_CHECK_NOTNULL(task); + } else if (KernelFactory::Instance().Create(type) != nullptr) { + GELOGI("create HostKernelNodeTask for node %s, type %s.", name.c_str(), type.c_str()); + task = MakeShared(node); + GE_CHECK_NOTNULL(task); + } else if (host_aicpu::OpFactory::Instance().CheckSupported(type)) { + GELOGI("create HostAiCpuNodeTask for node %s, type %s.", name.c_str(), type.c_str()); + task = MakeShared(node); + GE_CHECK_NOTNULL(task); + } else { + GELOGE(UNSUPPORTED, "node %s type %s is not support in HostAiCpuNodeExecutor now.", name.c_str(), type.c_str()); + return UNSUPPORTED; + } + return SUCCESS; +} +} // namespace hybrid +} // namespace ge \ No newline at end of file diff --git a/src/ge/hybrid/node_executor/hostaicpu/host_aicpu_node_executor.h b/src/ge/hybrid/node_executor/hostaicpu/host_aicpu_node_executor.h new file mode 100644 index 00000000..03075b9b --- /dev/null +++ b/src/ge/hybrid/node_executor/hostaicpu/host_aicpu_node_executor.h @@ -0,0 +1,82 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_HYBRID_KERNEL_HOST_AICPU_NODE_EXECUTOR_H_ +#define GE_HYBRID_KERNEL_HOST_AICPU_NODE_EXECUTOR_H_ + +#include "inc/kernel.h" +#include "hybrid/node_executor/node_executor.h" + +namespace ge { +namespace hybrid { +class HostCpuNodeTaskBase : public NodeTask { + public: + explicit HostCpuNodeTaskBase(const NodePtr &node) : node_(node) {} + ~HostCpuNodeTaskBase() = default; + virtual Status UpdateArgs(TaskContext &context); + virtual Status ExecuteAsync(TaskContext &context, std::function done_callback); + + protected: + NodePtr node_; + + private: + virtual Status Execute(TaskContext &context, const std::vector &inputs, + std::vector &outputs) = 0; + virtual Status ProcessInputs(TaskContext &context, std::vector &inputs); + virtual Status ProcessOutputs(TaskContext &context, std::vector &outputs); +}; + +class CpuKernelNodeTask : public HostCpuNodeTaskBase { + public: + explicit CpuKernelNodeTask(const NodePtr &node) : HostCpuNodeTaskBase(node) {} + ~CpuKernelNodeTask() = default; + + private: + Status Execute(TaskContext &context, const std::vector &inputs, + std::vector &outputs) override; +}; + +class HostKernelNodeTask : public HostCpuNodeTaskBase { + public: + explicit HostKernelNodeTask(const NodePtr &node) : HostCpuNodeTaskBase(node) {} + ~HostKernelNodeTask() = default; + + private: + Status Execute(TaskContext &context, const std::vector &inputs, + std::vector &outputs) override; +}; + +class HostAiCpuNodeTask : public HostCpuNodeTaskBase { + public: + explicit HostAiCpuNodeTask(const NodePtr &node) : HostCpuNodeTaskBase(node) {} + ~HostAiCpuNodeTask() = default; + + private: + Status Execute(TaskContext &context, const std::vector &inputs, + std::vector &outputs) override; + Status ProcessInputs(TaskContext &context, std::vector &inputs) override; +}; + +class HostAiCpuNodeExecutor : public NodeExecutor { + public: + Status PrepareTask(NodeTask &task, TaskContext &context) const override; + + virtual Status LoadTask(const HybridModel &model, const NodePtr &node, + std::shared_ptr &task) const override; +}; +} // namespace hybrid +} // namespace ge +#endif // GE_HYBRID_KERNEL_HOST_AICPU_NODE_EXECUTOR_H_ diff --git a/src/ge/hybrid/node_executor/hostcpu/ge_local_node_executor.cc b/src/ge/hybrid/node_executor/hostcpu/ge_local_node_executor.cc index d353dff1..7cd10a83 100644 --- a/src/ge/hybrid/node_executor/hostcpu/ge_local_node_executor.cc +++ b/src/ge/hybrid/node_executor/hostcpu/ge_local_node_executor.cc @@ -62,7 +62,7 @@ Status RefInputTask::RefOneByOne(TaskContext &context) { for (uint32_t out_index = 0; out_index < output_num; ++out_index) { auto input = context.GetInput(out_index); GE_CHECK_NOTNULL(input); - context.SetOutput(out_index, *input); + GE_CHK_STATUS_RET(context.SetOutput(out_index, *input)); GELOGD("node %s type %s output[%u] ref input[%u] addr=%p.", node_name_.c_str(), node_type_.c_str(), out_index, out_index, input->GetData()); } @@ -82,7 +82,7 @@ Status RefInputTask::RefByOrder(const std::vector &ref_order, TaskCont auto ref_input_index = ref_order[out_index]; auto input = context.GetInput(ref_input_index); GE_CHECK_NOTNULL(input); - context.SetOutput(out_index, *input); + GE_CHK_STATUS_RET(context.SetOutput(out_index, *input)); GELOGD("node %s type %s output[%d] ref input[%u] addr=%p.", node_name_.c_str(), node_type_.c_str(), out_index, ref_input_index, input->GetData()); } diff --git a/src/ge/ir_build/atc_ir_common.cc b/src/ge/ir_build/atc_ir_common.cc index 352e5dc2..d4402833 100644 --- a/src/ge/ir_build/atc_ir_common.cc +++ b/src/ge/ir_build/atc_ir_common.cc @@ -16,6 +16,7 @@ #include "atc_ir_common.h" #include "common/util/error_manager/error_manager.h" +#include "common/model_parser/graph_parser_util.h" #include "external/ge/ge_api_types.h" #include "framework/common/string_util.h" #include "framework/common/types.h" @@ -29,6 +30,9 @@ namespace ge { namespace { const int64_t kDynamicInputDim = -1; const int64_t kDynamicImageSizeNum = 2; +const size_t kMaxDynamicDimNum = 100; +const size_t kMaxNDDimNum = 4; +const size_t kMinNDDimNum = 1; // datatype/formats from user to GE, Unified to util interface file later const std::map kOutputTypeSupportDatatype = { {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}}; @@ -38,23 +42,9 @@ const std::set kBufferOptimizeSupportOption = {"l1_optimize", "l2_o // The function is incomplete. Currently, only l2_optimize, off_optimize is supported. const char *const kBufferOptimizeSupport = "only support l2_optimize, off_optimize"; const std::string IR_OPTION_OP_SELECT_IMPLMODE_DEFAULT = "high_performance"; -const char *const kInputShapeSample1 = "\"input_name1:n1,c1,h1,w1\""; -const char *const kInputShapeSample2 = "\"input_name1:1,3,224,224\""; -const char *const kSplitError1 = "size not equal to 2 split by \":\""; -const char *const kEmptyError = "can not be empty"; -const char *const kFloatNumError = "exist float number"; -const char *const kDigitError = "is not digit"; const char *const kCompressWeightError = "it must be appointed when appoint parameter[--optypelist_for_implmode]"; +const char *const kDynamicBatchSizeError = "It can only contains digit, \",\", \" \""; -vector SplitInputShape(const std::string &input_shape) { - vector shape_pair_vec; - size_t pos = input_shape.rfind(":"); - if (pos != std::string::npos) { - shape_pair_vec.emplace_back(input_shape.substr(0, pos)); - shape_pair_vec.emplace_back(input_shape.substr(pos + 1, input_shape.size() - pos)); - } - return shape_pair_vec; -} } // namespace bool CheckDynamicBatchSizeInputShapeValid(unordered_map> shape_map, @@ -89,9 +79,10 @@ bool CheckDynamicBatchSizeInputShapeValid(unordered_map> for (char c : dynamic_batch_size) { if (!isdigit(c) && (c != ',') && (c != ' ')) { - ErrorManager::GetInstance().ATCReportErrMessage("E10033", {"value"}, {dynamic_batch_size}); - GELOGE(ge::PARAM_INVALID, "Input parameter[--dynamic_batch_size]'s value[%s] is invalid.", - dynamic_batch_size.c_str()); + ErrorManager::GetInstance().ATCReportErrMessage("E10033", {"value", "reason"}, + {dynamic_batch_size, kDynamicBatchSizeError}); + GELOGE(ge::PARAM_INVALID, "Input parameter[--dynamic_batch_size]'s value[%s] is invalid. reason: %s", + dynamic_batch_size.c_str(), kDynamicBatchSizeError); return false; } } @@ -111,7 +102,7 @@ bool CheckDynamicImagesizeInputShapeValid(unordered_map> if (std::count(shape.begin(), shape.end(), kDynamicInputDim) > 0) { ErrorManager::GetInstance().ATCReportErrMessage("E10019"); GELOGE(ge::PARAM_INVALID, - "--input_shape's shape is invalid, only height or width can be -1 when set --dynamic_image_size."); + "--input_shape's shape is invalid, only height and width can be -1 when set --dynamic_image_size."); return false; } continue; @@ -137,21 +128,18 @@ bool CheckDynamicImagesizeInputShapeValid(unordered_map> } else { ErrorManager::GetInstance().ATCReportErrMessage("E10019"); GELOGE(ge::PARAM_INVALID, - "--input_shape's shape is invalid, only height or width can be -1 when set --dynamic_image_size."); + "--input_shape's shape is invalid, only height and width can be -1 when set --dynamic_image_size."); return false; } } if (size == 0) { ErrorManager::GetInstance().ATCReportErrMessage("E10019"); GELOGE(ge::PARAM_INVALID, - "--input_shape's shape is invalid, only height or width can be -1 when set --dynamic_image_size."); + "--input_shape's shape is invalid, only height and width can be -1 when set --dynamic_image_size."); return false; } - if (dynamic_image_size.back() == ';') { - dynamic_image_size.erase(dynamic_image_size.end() - 1); - } - + EraseEndSemicolon(dynamic_image_size); // Different parameter sets are split string by ';' std::vector split_set = StringUtils::Split(dynamic_image_size, ';'); // Different dimensions are split by ',' @@ -172,17 +160,106 @@ bool CheckDynamicImagesizeInputShapeValid(unordered_map> return true; } -Status CheckDynamicBatchSizeOrImageSizeParamValid(std::string &dynamic_batch_size, std::string &dynamic_image_size, - const std::string input_shape, const std::string input_format, - bool &is_dynamic_input) { - if (!dynamic_batch_size.empty() && !dynamic_image_size.empty()) { - ErrorManager::GetInstance().ATCReportErrMessage("E10009", {"parameter0", "parameter1"}, - {"dynamic_batch_size", "dynamic_image_size"}); - GELOGE(ge::PARAM_INVALID, "dynamic_batch_size and dynamic_image_size can not both exist"); +bool CheckDynamicDimsInputShapeValid(const unordered_map> &shape_map, string input_format, + string &dynamic_dims) { + if (input_format != "ND") { + ErrorManager::GetInstance().ATCReportErrMessage( + "E10001", {"parameter", "value", "reason"}, + {"--input_format", input_format.c_str(), "input_format must be ND when set dynamic_dims"}); + GELOGE(ge::PARAM_INVALID, "input_format must be ND when set dynamic_dims."); + return false; + } + + int32_t dynamic_dim = 0; + for (auto &info_shapes : shape_map) { + auto &shapes = info_shapes.second; + if (shapes.size() > kMaxNDDimNum || shapes.size() < kMinNDDimNum) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E10001", {"parameter", "value", "reason"}, + {"--input_shape's dim", std::to_string(shapes.size()), "Dim num must within [1, 4] when set dynamic_dims"}); + GELOGE(ge::PARAM_INVALID, "Dim num must within [%zu, %zu] when set dynamic_dims.", kMinNDDimNum, kMaxNDDimNum); + return false; + } + int tmp = std::count(shapes.begin(), shapes.end(), kDynamicInputDim); + if (dynamic_dim != 0 && dynamic_dim != tmp) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E10001", {"parameter", "value", "reason"}, + {"--input_shape's -1 num", std::to_string(tmp), "Every set's num of -1 must be same"}); + GELOGE(ge::PARAM_INVALID, "input_shape's shape is invalid, every set's num of -1 must be same."); + return false; + } + dynamic_dim = tmp; + } + if (dynamic_dim == 0) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E10001", {"parameter", "value", "reason"}, + {"--input_shape's dynamic dim num", "0", "at least one dim should be -1 when set dynamic_dims"}); + GELOGE(ge::PARAM_INVALID, "input_shape's shape is invalid, at least one dim should be -1 when set dynamic_dims."); + return false; + } + + if (!CheckAndParseDynamicDims(dynamic_dim, dynamic_dims)) { + GELOGE(ge::PARAM_INVALID, "Check and parse dynamic dims: %s failed.", dynamic_dims.c_str()); + return false; + } + + return true; +} + +bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims) { + EraseEndSemicolon(dynamic_dims); + if (dynamic_dims.empty()) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E10001", {"parameter", "value", "reason"}, + {"--dynamic_dims", dynamic_dims.c_str(), "dynamic_dims can not be empty"}); + GELOGE(ge::PARAM_INVALID, "dynamic_dims can not be empty."); + return false; + } + // Different parameter sets are split by ';' + vector split_set = StringUtils::Split(dynamic_dims, ';'); + if (split_set.size() > kMaxDynamicDimNum) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E10042", {"parameter", "reason"}, {"dynamic_dims", "dynamic_dims's num of parameter set can not exceed 100"}); + GELOGE(ge::PARAM_INVALID, "dynamic_dims's num of parameter set can not exceed %zu.", kMaxDynamicDimNum); + return false; + } + for (auto split_dim : split_set) { + vector one_set = StringUtils::Split(split_dim, ','); + if (one_set.size() != static_cast(dynamic_dim_num)) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E10001", {"parameter", "value", "reason"}, + {"--dynamic_dims's parameter num of each set", std::to_string(one_set.size()), + "must be same as input_shape's num of -1"}); + GELOGE(ge::PARAM_INVALID, "dynamic_dims's parameter num of each set must be same as input_shape's num of -1."); + return false; + } + for (auto dim : one_set) { + for (auto c : dim) { + if (!isdigit(c)) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E10001", {"parameter", "value", "reason"}, + {"--dynamic_dims's parameter", dim.c_str(), "must be positive integer"}); + GELOGE(ge::PARAM_INVALID, "dynamic_dims's parameter must be positive integer."); + return false; + } + } + } + } + return true; +} + +Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_image_size, string &dynamic_dims, + const string input_shape, const string input_format, bool &is_dynamic_input) { + int32_t param_size = static_cast(!dynamic_batch_size.empty()) + + static_cast(!dynamic_image_size.empty()) + static_cast(!dynamic_dims.empty()); + if (param_size > 1) { + ErrorManager::GetInstance().ATCReportErrMessage("E10009", {"parameter0", "parameter1", "parameter2"}, + {"dynamic_batch_size", "dynamic_image_size", "dynamic_dims"}); + GELOGE(ge::PARAM_INVALID, "dynamic_batch_size, dynamic_image_size and dynamic_dims can only be set one"); return ge::PARAM_INVALID; } - if (dynamic_batch_size.empty() && dynamic_image_size.empty()) { + if (param_size == 0) { return ge::SUCCESS; } @@ -191,7 +268,7 @@ Status CheckDynamicBatchSizeOrImageSizeParamValid(std::string &dynamic_batch_siz is_dynamic_input = true; if (input_shape.empty()) { ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {"input_shape"}); - GELOGE(ge::PARAM_INVALID, "The input_shape can not be empty in dynamic batchsize scenario."); + GELOGE(ge::PARAM_INVALID, "The input_shape can not be empty in dynamic input size scenario."); return ge::PARAM_INVALID; } @@ -213,91 +290,15 @@ Status CheckDynamicBatchSizeOrImageSizeParamValid(std::string &dynamic_batch_siz return ge::PARAM_INVALID; } } - return ge::SUCCESS; -} - -bool ParseInputShape(const string &input_shape, unordered_map> &shape_map, - vector>> &user_shape_map, bool is_dynamic_input) { - vector shape_vec = StringUtils::Split(input_shape, ';'); - const int DEFAULT_SHAPE_PAIR_SIZE = 2; - for (const auto &shape : shape_vec) { - vector shape_pair_vec = SplitInputShape(shape); - if (shape_pair_vec.size() != DEFAULT_SHAPE_PAIR_SIZE) { - ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, - {shape, kSplitError1, kInputShapeSample1}); - GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.", - shape.c_str(), kSplitError1, kInputShapeSample1); - return false; - } - if (shape_pair_vec[1].empty()) { - ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, - {shape, kEmptyError, kInputShapeSample1}); - GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.", - shape.c_str(), kEmptyError, kInputShapeSample1); - return false; - } - - vector shape_value_strs = StringUtils::Split(shape_pair_vec[1], ','); - vector shape_values; - for (auto &shape_value_str : shape_value_strs) { - // stoul: The method may throw an exception: invalid_argument/out_of_range - if (std::string::npos != shape_value_str.find('.')) { - ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, - {shape, kFloatNumError, kInputShapeSample2}); - GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.", - shape.c_str(), kFloatNumError, kInputShapeSample2); - return false; - } - long left_result = 0; - try { - left_result = stol(StringUtils::Trim(shape_value_str)); - if (!shape_value_str.empty() && (shape_value_str.front() == '-')) { - // The value maybe dynamic shape [-1], need substr it and verify isdigit. - shape_value_str = shape_value_str.substr(1); - } - for (char c : shape_value_str) { - if (!isdigit(c)) { - ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, - {shape, kDigitError, kInputShapeSample2}); - GELOGE(PARAM_INVALID, "--input_shape's shape value[%s] is not digit", shape_value_str.c_str()); - return false; - } - } - } catch (const std::out_of_range &) { - ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, - {"input_shape", shape_value_str}); - GELOGW("Input parameter[--input_shape]’s value[%s] cause out of range execption!", shape_value_str.c_str()); - return false; - } catch (const std::invalid_argument &) { - ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, - {"input_shape", shape_value_str}); - GELOGW("Input parameter[--input_shape]’s value[%s] cause invalid argument!", shape_value_str.c_str()); - return false; - } catch (...) { - ErrorManager::GetInstance().ATCReportErrMessage("E10015", {"parameter", "value"}, - {"input_shape", shape_value_str}); - GELOGW("Input parameter[--input_shape]’s value[%s] cause unkown execption!", shape_value_str.c_str()); - return false; - } - int64_t result = left_result; - // - 1 is not currently supported - if (!is_dynamic_input && result <= 0) { - ErrorManager::GetInstance().ATCReportErrMessage("E10011", {"shape", "result"}, {shape, std::to_string(result)}); - GELOGW( - "Input parameter[--input_shape]’s shape value[%s] is invalid, " - "expect positive integer, but value is %ld.", - shape.c_str(), result); - return false; - } - shape_values.push_back(result); + if (!dynamic_dims.empty()) { + if (!CheckDynamicDimsInputShapeValid(shape_map, input_format, dynamic_dims)) { + GELOGE(ge::PARAM_INVALID, "Check dynamic dims: %s of input shape: %s failed.", dynamic_dims.c_str(), + input_shape.c_str()); + return ge::PARAM_INVALID; } - - shape_map.emplace(make_pair(StringUtils::Trim(shape_pair_vec[0]), shape_values)); - user_shape_map.push_back(make_pair(StringUtils::Trim(shape_pair_vec[0]), shape_values)); } - - return true; + return ge::SUCCESS; } Status CheckOutputTypeParamValid(const std::string output_type) { @@ -427,4 +428,13 @@ void PrintOptionMap(std::map &options, std::string tip GELOGI("%s set successfully, key=%s, value=%s", tips.c_str(), key.c_str(), option_name.c_str()); } } + +void EraseEndSemicolon(string ¶m) { + if (param.empty()) { + return; + } + if (param.back() == ';') { + param.erase(param.end() - 1); + } +} } // namespace ge diff --git a/src/ge/ir_build/atc_ir_common.h b/src/ge/ir_build/atc_ir_common.h index 8a578767..e4d3103b 100644 --- a/src/ge/ir_build/atc_ir_common.h +++ b/src/ge/ir_build/atc_ir_common.h @@ -49,12 +49,14 @@ bool CheckDynamicBatchSizeInputShapeValid(unordered_map> bool CheckDynamicImagesizeInputShapeValid(unordered_map> shape_map, const std::string input_format, std::string &dynamic_image_size); -Status CheckDynamicBatchSizeOrImageSizeParamValid(std::string &dynamic_batch_size, std::string &dynamic_image_size, - const std::string input_shape, const std::string input_format, - bool &is_dynamic_input); +bool CheckDynamicDimsInputShapeValid(const std::unordered_map> &shape_map, + std::string input_format, std::string &dynamic_dims); -bool ParseInputShape(const std::string &input_shape, std::unordered_map> &shape_map, - std::vector>> &user_shape_map, bool is_dynamic_input = false); +bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims); + +Status CheckDynamicInputParamValid(std::string &dynamic_batch_size, std::string &dynamic_image_size, + std::string &dynamic_dims, const std::string input_shape, + const std::string input_format, bool &is_dynamic_input); Status CheckOutputTypeParamValid(const std::string output_type); Status CheckBufferOptimizeParamValid(const std::string buffer_optimize); @@ -65,5 +67,6 @@ Status CheckDisableReuseMemoryParamValid(const std::string disable_reuse_memory) Status CheckEnableSingleStreamParamValid(const std::string enable_single_stream); Status CheckImplmodeParamValid(const std::string &optypelist_for_implmode, std::string &op_select_implmode); void PrintOptionMap(std::map &options, std::string tips); +void EraseEndSemicolon(std::string ¶m); } // namespace ge #endif // FRAMEWORK_DOMI_ATC_IR_COMMON_H_ diff --git a/src/ge/ir_build/ge_ir_build.cc b/src/ge/ir_build/ge_ir_build.cc index a64591da..22163ca5 100644 --- a/src/ge/ir_build/ge_ir_build.cc +++ b/src/ge/ir_build/ge_ir_build.cc @@ -26,6 +26,7 @@ #include "framework/common/util.h" #include "framework/omg/omg_inner_types.h" #include "framework/omg/omg_inner_types.h" +#include "common/model_parser/graph_parser_util.h" #include "ge/ge_api_types.h" #include "generator/ge_generator.h" #include "graph/compute_graph.h" @@ -151,6 +152,7 @@ class Impl { GetContext().is_dynamic_input = false; GetContext().dynamic_batch_size.clear(); GetContext().dynamic_image_size.clear(); + GetContext().dynamic_dims.clear(); }; ~Impl() { (void)generator_.Finalize(); }; graphStatus CheckOptions(const std::map &options); @@ -200,17 +202,20 @@ graphStatus Impl::Init(const std::map &options) { string dynamic_image_size = options_.find(ge::ir_option::DYNAMIC_IMAGE_SIZE) == options_.end() ? "" : options_[ge::ir_option::DYNAMIC_IMAGE_SIZE]; + string dynamic_dims = + options_.find(ge::ir_option::DYNAMIC_DIMS) == options_.end() ? "" : options_[ge::ir_option::DYNAMIC_DIMS]; - auto status = CheckDynamicBatchSizeOrImageSizeParamValid(dynamic_batch_size, dynamic_image_size, input_shape, - input_format, is_dynamic_input_); + auto status = CheckDynamicInputParamValid(dynamic_batch_size, dynamic_image_size, dynamic_dims, input_shape, + input_format, is_dynamic_input_); if (status != ge::SUCCESS) { - GELOGE(GRAPH_PARAM_INVALID, "check dynamic batch size or image size failed!"); + GELOGE(GRAPH_PARAM_INVALID, "Check dynamic input size failed!"); return GRAPH_PARAM_INVALID; } - GELOGD("user input dynamic_batch_size:%s,dynamic_image_size:%s", dynamic_batch_size.c_str(), - dynamic_image_size.c_str()); + GELOGD("User input dynamic_batch_size:%s, dynamic_image_size:%s, dynamic_dims:%s.", dynamic_batch_size.c_str(), + dynamic_image_size.c_str(), dynamic_dims.c_str()); GetContext().dynamic_batch_size = dynamic_batch_size; GetContext().dynamic_image_size = dynamic_image_size; + GetContext().dynamic_dims = dynamic_dims; // check output_type std::string output_type = options_.find(ge::ir_option::OUTPUT_TYPE) == options_.end() ? "" : options_[ge::ir_option::OUTPUT_TYPE]; @@ -243,11 +248,13 @@ graphStatus Impl::Init(const std::map &options) { graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vector &inputs) { auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); GE_CHECK_NOTNULL(compute_graph); + int64_t index = 0; for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { GE_CHECK_NOTNULL(input_node); ge::OpDescPtr op = input_node->GetOpDesc(); GE_CHECK_NOTNULL(op); if (op->GetType() == DATA) { + AttrUtils::SetInt(op, ATTR_NAME_INDEX, index++); GELOGI("Data op inputDesc size is: %zu", op->GetAllInputsDesc().size()); ge::GeTensorDesc tensor = op->GetInputDesc(0); string data_op_name = op->GetName(); @@ -259,7 +266,7 @@ graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vector &fileList, string &c void LoadModelParserLib(std::string caffe_parser_path) { if (FLAGS_framework == static_cast(domi::TENSORFLOW)) { - void *tf_handle = dlopen("libfmk_tensorflow_parser.so", RTLD_NOW | RTLD_GLOBAL); + void *tf_handle = dlopen("libfmk_parser.so", RTLD_NOW | RTLD_GLOBAL); if (tf_handle == nullptr) { - GELOGW("dlopen fmk library [libfmk_tensorflow_parser.so] failed."); + GELOGW("dlopen fmk library [libfmk_parser.so] failed."); return; } - GELOGI("plugin load libfmk_tensorflow_parser.so success."); + GELOGI("plugin load libfmk_parser.so success."); } else if (FLAGS_framework == static_cast(domi::CAFFE)) { // What we are dealing with here is that the user modifies the caffe.proto scenario. // If no lib_Caffe_Parser.so is found under the plugin path, use the default lib_Caffe_Parser.so path. @@ -596,17 +602,17 @@ void LoadModelParserLib(std::string caffe_parser_path) { return; } GELOGI("plugin load %s success.", caffe_parser_path.c_str()); - // According to the dependency, the Caffe parsing module of the framework is loaded here( libfmk_caffe_parser.so). + // According to the dependency, the Caffe parsing module of the framework is loaded here( libfmk_parser.so). // (depend on the lib_caffe_parser.so) - void *fmk_handle = dlopen("libfmk_caffe_parser.so", RTLD_NOW | RTLD_GLOBAL); + void *fmk_handle = dlopen("libfmk_parser.so", RTLD_NOW | RTLD_GLOBAL); if (fmk_handle == nullptr) { - GELOGW("dlopen fmk library [libfmk_caffe_parser.so] failed."); + GELOGW("dlopen fmk library [libfmk_parser.so] failed."); if (dlclose(handle) != 0) { GELOGW("dlclose lib_caffe_parser.so failed."); } return; } - GELOGI("plugin load libfmk_caffe_parser.so success."); + GELOGI("plugin load libfmk_parser.so success."); } else if (FLAGS_framework == static_cast(domi::ONNX)) { void *handle = dlopen("libfmk_onnx_parser.so", RTLD_NOW | RTLD_GLOBAL); if (handle == nullptr) { @@ -657,8 +663,10 @@ void LoadCustomOpLib(bool need_load_ops_plugin) { std::vector registrationDatas = OpRegistry::Instance()->registrationDatas; for (OpRegistrationData reg_data : registrationDatas) { - (void)ge::OpRegistrationTbe::Instance()->Finalize(reg_data); - (void)OpRegistry::Instance()->Register(reg_data); + if (reg_data.GetFrameworkType() == static_cast(FLAGS_framework)) { + (void)ge::OpRegistrationTbe::Instance()->Finalize(reg_data); + (void)OpRegistry::Instance()->Register(reg_data); + } } } @@ -780,12 +788,12 @@ static Status ConvertModelToJson(int fwk_type, const string &model_file, const s } if (FLAGS_dump_mode == "0") { - // Caffe or tf model to json depend on lib_caffe_parser.so or libfmk_tensorflow_parser.so. + // Caffe or tf model to json depend on lib_caffe_parser.so or libfmk_parser.so. LoadCustomOpLib(false); ret = ge::ConvertFwkModelToJson((domi::FrameworkType)fwk_type, model_file.c_str(), json_file.c_str()); return ret; } else if (FLAGS_dump_mode == "1") { - // Caffe or tf model to json depend on lib_caffe_parser.so or libfmk_tensorflow_parser.so and ops plugin so. + // Caffe or tf model to json depend on lib_caffe_parser.so or libfmk_parser.so and ops plugin so. LoadCustomOpLib(true); ret = GenerateInfershapeJson(); return ret; @@ -886,7 +894,7 @@ domi::Status GenerateModel(std::map &options, std::string output (void)ge::GELib::GetInstance()->Finalize(); return domi::FAILED; } - if (SetOutputNodeInfo(graph, FLAGS_output_type, "") != domi::SUCCESS) { + if (ge::SetOutputNodeInfo(graph, FLAGS_output_type, "") != domi::SUCCESS) { DOMI_LOGE("Set output node info fail."); (void)ge_generator.Finalize(); (void)ge::GELib::GetInstance()->Finalize(); @@ -941,12 +949,6 @@ domi::Status GenerateSingleOp(const std::string &json_file_path) { // need to be changed when ge.ini plan is done SetEnvForSingleOp(options); - vector build_params; - if (ge::SingleOpParser::ParseSingleOpList(json_file_path, build_params) != ge::SUCCESS) { - DOMI_LOGE("parse single op json file failed"); - return domi::FAILED; - } - auto ret = ge::GELib::Initialize(options); if (ret != ge::SUCCESS) { DOMI_LOGE("GE initialize failed!"); @@ -961,6 +963,14 @@ domi::Status GenerateSingleOp(const std::string &json_file_path) { return domi::FAILED; } + vector build_params; + if (ge::SingleOpParser::ParseSingleOpList(json_file_path, build_params) != ge::SUCCESS) { + DOMI_LOGE("parse single op json file failed"); + (void)generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + return domi::FAILED; + } + int index = 0; for (auto ¶m : build_params) { string output_path; @@ -1057,7 +1067,7 @@ domi::Status GenerateOmModel() { options.insert(std::pair(string(ge::ENABLE_SINGLE_STREAM), FLAGS_enable_single_stream)); - SetDynamicBatchSizeOrImagesizeOptions(); + SetDynamicInputSizeOptions(); if (!FLAGS_save_original_model.empty()) { options.insert(std::pair(string(ge::SAVE_ORIGINAL_MODEL), FLAGS_save_original_model)); @@ -1137,7 +1147,7 @@ int init(int argc, char *argv[]) { GFlagUtils::InitGFlag(argc, argv); // set log level int ret = -1; - const std::set log_level = {"default", "null", "debug", "info", "warning", "error"}; + const std::set log_level = {"null", "debug", "info", "warning", "error"}; if (log_level.count(FLAGS_log) == 0) { std::cout << "E10010: invalid value for --log:" << FLAGS_log << ", only support debug, info, warning, error, null" << std::endl; @@ -1211,6 +1221,7 @@ int main(int argc, char *argv[]) { return ret; } else { std::cout << "ATC run success, welcome to the next use." << std::endl; + (void)ErrorManager::GetInstance().OutputMessage(STDOUT_FILENO); return 0; } } diff --git a/src/ge/offline/module.mk b/src/ge/offline/module.mk index c97e7813..a347362a 100644 --- a/src/ge/offline/module.mk +++ b/src/ge/offline/module.mk @@ -42,7 +42,7 @@ LOCAL_SHARED_LIBRARIES := \ libge_compiler \ libruntime_compile \ libparser_common \ - libfmk_tensorflow_parser \ + libfmk_parser \ liberror_manager \ LOCAL_STATIC_LIBRARIES := libgflags diff --git a/src/ge/offline/single_op_parser.cc b/src/ge/offline/single_op_parser.cc index b8947a65..9e209cc8 100644 --- a/src/ge/offline/single_op_parser.cc +++ b/src/ge/offline/single_op_parser.cc @@ -28,6 +28,8 @@ #include "common/ge_inner_error_codes.h" #include "framework/common/util.h" #include "graph/utils/tensor_utils.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/operator_factory_impl.h" using Json = nlohmann::json; using std::map; @@ -43,10 +45,14 @@ constexpr char const *kKeyAttr = "attr"; constexpr char const *kKeyName = "name"; constexpr char const *kKeyType = "type"; constexpr char const *kKeyShape = "shape"; +constexpr char const *kKeyShapeRange = "shape_range"; constexpr char const *kKeyValue = "value"; constexpr char const *kKeyFormat = "format"; constexpr char const *kFileSuffix = ".om"; constexpr int kDumpJsonIndent = 2; +constexpr int kShapeRangePairSize = 2; +constexpr int kShapeRangeLow = 0; +constexpr int kShapeRangeHigh = 1; map kAttrTypeDict = { {"bool", GeAttrValue::VT_BOOL}, @@ -90,6 +96,10 @@ T GetValue(const map &dict, string &key, T default_val) { void from_json(const Json &j, SingleOpTensorDesc &desc) { desc.dims = j.at(kKeyShape).get>(); + auto it = j.find(kKeyShapeRange); + if (it != j.end()) { + desc.dim_ranges = j.at(kKeyShapeRange).get>>(); + } string format_str = j.at(kKeyFormat).get(); string type_str = j.at(kKeyType).get(); desc.format = GetValue(kFormatDict, format_str, FORMAT_RESERVED); @@ -265,6 +275,7 @@ Status SingleOpParser::ConvertToBuildParam(int index, const SingleOpDesc &single } GeTensorDesc ge_tensor_desc(GeShape(desc.dims), desc.format, desc.type); ge_tensor_desc.SetOriginFormat(desc.format); + GE_CHK_STATUS_RET_NOLOG(SetShapeRange(desc, ge_tensor_desc)); TensorUtils::SetRealDimCnt(ge_tensor_desc, desc.dims.size()); TensorUtils::SetInputTensor(ge_tensor_desc, true); TensorUtils::SetOutputTensor(ge_tensor_desc, false); @@ -284,6 +295,7 @@ Status SingleOpParser::ConvertToBuildParam(int index, const SingleOpDesc &single GeTensorDesc ge_tensor_desc(GeShape(desc.dims), desc.format, desc.type); ge_tensor_desc.SetOriginFormat(desc.format); + GE_CHK_STATUS_RET_NOLOG(SetShapeRange(desc, ge_tensor_desc)); TensorUtils::SetRealDimCnt(ge_tensor_desc, desc.dims.size()); TensorUtils::SetInputTensor(ge_tensor_desc, false); TensorUtils::SetOutputTensor(ge_tensor_desc, true); @@ -297,8 +309,75 @@ Status SingleOpParser::ConvertToBuildParam(int index, const SingleOpDesc &single file_name << kFileSuffix; build_param.file_name = file_name.str(); - build_param.op_desc.reset(op_desc); + if (VerifyOpInputOutputSizeByIr(*op_desc) != SUCCESS) { + GELOGE(PARAM_INVALID, "Verify op [%s] input or output size failed.", op_desc->GetType().c_str()); + return PARAM_INVALID; + } + return SUCCESS; +} + +Status SingleOpParser::VerifyOpInputOutputSizeByIr(const OpDesc ¤t_op_desc) { + ge::Operator operator_ir = ge::OperatorFactory::CreateOperator("tmp_operator", current_op_desc.GetType()); + if (!operator_ir.IsEmpty()) { + auto opdesc_ir = ge::OpDescUtils::GetOpDescFromOperator(operator_ir); + GE_CHECK_NOTNULL(opdesc_ir); + size_t current_opdesc_inputs_num = current_op_desc.GetInputsSize(); + size_t ir_opdesc_inputs_num = opdesc_ir->GetInputsSize(); + if (current_opdesc_inputs_num < ir_opdesc_inputs_num) { + string reason = "is smaller than the ir needed input size " + std::to_string(ir_opdesc_inputs_num); + ErrorManager::GetInstance().ATCReportErrMessage( + "E19014", {"opname", "value", "reason"}, + {current_op_desc.GetName(), "input size " + std::to_string(current_opdesc_inputs_num), reason}); + GELOGE(PARAM_INVALID, "This op [%s] input size %zu is smaller than the ir needed input size %zu", + current_op_desc.GetName().c_str(), current_opdesc_inputs_num, ir_opdesc_inputs_num); + return PARAM_INVALID; + } + size_t current_opdesc_outputs_num = current_op_desc.GetOutputsSize(); + size_t ir_opdesc_outputs_num = opdesc_ir->GetOutputsSize(); + if (current_opdesc_outputs_num < ir_opdesc_outputs_num) { + string reason = "is smaller than the ir needed output size " + std::to_string(ir_opdesc_outputs_num); + ErrorManager::GetInstance().ATCReportErrMessage( + "E19014", {"opname", "value", "reason"}, + {current_op_desc.GetName(), "output size " + std::to_string(current_opdesc_outputs_num), reason}); + GELOGE(PARAM_INVALID, "This op [%s] output size %zu is smaller than the ir needed output size %zu", + current_op_desc.GetName().c_str(), current_opdesc_outputs_num, ir_opdesc_outputs_num); + return PARAM_INVALID; + } + } + return SUCCESS; +} + +Status SingleOpParser::SetShapeRange(const SingleOpTensorDesc &tensor_desc, GeTensorDesc &ge_tensor_desc) { + if (tensor_desc.dim_ranges.empty()) { + return SUCCESS; + } + + std::vector> shape_range; + size_t range_index = 0; + for (auto dim : tensor_desc.dims) { + if (dim >= 0) { + shape_range.emplace_back(dim, dim); + GELOGD("Adding shape range: [%ld, %ld]", dim, dim); + } else { + if (range_index >= tensor_desc.dim_ranges.size()) { + GELOGE(PARAM_INVALID, "The number of shape_range mismatches that of unknown dims."); + return PARAM_INVALID; + } + + auto &range = tensor_desc.dim_ranges[range_index]; + if (range.size() != kShapeRangePairSize) { + GELOGE(PARAM_INVALID, "Invalid shape range entry. index = %zu, size = %zu", range_index, range.size()); + return PARAM_INVALID; + } + + shape_range.emplace_back(range[kShapeRangeLow], range[kShapeRangeHigh]); + GELOGD("Adding shape range: [%ld, %ld]", range[kShapeRangeLow], range[kShapeRangeHigh]); + ++range_index; + } + } + + ge_tensor_desc.SetShapeRange(shape_range); return SUCCESS; } diff --git a/src/ge/offline/single_op_parser.h b/src/ge/offline/single_op_parser.h index 13c2e565..4ca75188 100644 --- a/src/ge/offline/single_op_parser.h +++ b/src/ge/offline/single_op_parser.h @@ -31,6 +31,7 @@ namespace ge { struct SingleOpTensorDesc { std::string name; std::vector dims; + std::vector> dim_ranges; ge::Format format = ge::FORMAT_RESERVED; ge::DataType type = ge::DT_UNDEFINED; }; @@ -70,6 +71,8 @@ class SingleOpParser { static bool Validate(const SingleOpDesc &op_desc); static OpDesc *CreateOpDesc(const std::string &op_type); static Status ConvertToBuildParam(int index, const SingleOpDesc &single_op_desc, SingleOpBuildParam &build_param); + static Status VerifyOpInputOutputSizeByIr(const OpDesc ¤t_op_desc); + static Status SetShapeRange(const SingleOpTensorDesc &tensor_desc, GeTensorDesc &ge_tensor_desc); }; } // namespace ge diff --git a/src/ge/opskernel_manager/ops_kernel_manager.cc b/src/ge/opskernel_manager/ops_kernel_manager.cc index d4d2b1ce..0d6f1e07 100644 --- a/src/ge/opskernel_manager/ops_kernel_manager.cc +++ b/src/ge/opskernel_manager/ops_kernel_manager.cc @@ -136,7 +136,7 @@ void OpsKernelManager::GetExternalEnginePath(std::string &extern_engine_path) { std::string path = path_base + so_path; extern_engine_path = (path + "libfe.so" + ":") + (path + "libge_local_engine.so" + ":") + (path + "librts_engine.so" + ":") + (path + "libaicpu_engine.so" + ":") + - (path_base + "libhccl.so"); + (path_base + "libhcom_graph_adaptor.so"); } Status OpsKernelManager::InitPluginOptions(const map &options) { diff --git a/src/ge/session/omg.cc b/src/ge/session/omg.cc index 26103063..55075d6a 100644 --- a/src/ge/session/omg.cc +++ b/src/ge/session/omg.cc @@ -22,15 +22,16 @@ #include "common/convert/pb2json.h" #include "common/debug/log.h" #include "common/debug/memory_dumper.h" +#include "common/ge/ge_util.h" +#include "common/helper/model_helper.h" #include "common/model_parser/base.h" +#include "common/model_parser/graph_parser_util.h" #include "common/model_saver.h" #include "common/properties_manager.h" #include "common/string_util.h" #include "common/types.h" #include "common/util.h" #include "common/util/error_manager/error_manager.h" -#include "common/helper/model_helper.h" -#include "common/ge/ge_util.h" #include "framework/common/debug/ge_log.h" #include "framework/omg/parser/parser_inner_ctx.h" #include "google/protobuf/io/zero_copy_stream_impl.h" @@ -113,6 +114,22 @@ static Status CheckInputShapeNode(const ComputeGraphPtr &graph) { return SUCCESS; } +void AddAttrsForInputNodes(const vector &adjust_fp16_format_vec, const string &fp16_nodes_name, uint32_t index, + OpDescPtr &op_desc) { + if (AttrUtils::SetBool(op_desc, "input_fp16", true) && + AttrUtils::SetStr(op_desc, ATTR_ATC_USER_DEFINE_DATATYPE, TypeUtils::DataTypeToSerialString(DT_FLOAT16))) { + if ((index < adjust_fp16_format_vec.size()) && (adjust_fp16_format_vec[index] == "true")) { + GELOGI("This node [%s] should be set NC1HWC0", fp16_nodes_name.c_str()); + if (!AttrUtils::SetBool(op_desc, "input_set_nc1hwc0", true)) { + GELOGW("This node [%s] set NC1HWC0 failed", fp16_nodes_name.c_str()); + } + if (!AttrUtils::SetStr(op_desc, ATTR_ATC_USER_DEFINE_FORMAT, TypeUtils::FormatToSerialString(FORMAT_NC1HWC0))) { + GELOGW("This node [%s] set NC1HWC0 failed", fp16_nodes_name.c_str()); + } + } + } +} + static Status CheckInputFp16Nodes(const ComputeGraphPtr &graph, const string &input_fp16_nodes, const string &is_input_adjust_hw_layout) { GE_CHECK_NOTNULL(graph); @@ -151,14 +168,7 @@ static Status CheckInputFp16Nodes(const ComputeGraphPtr &graph, const string &in input_fp16_nodes_vec[i].c_str()); return PARAM_INVALID; } - if (ge::AttrUtils::SetBool(op_desc, "input_fp16", true)) { - if ((i < adjust_fp16_format_vec.size()) && (adjust_fp16_format_vec[i] == "true")) { - GELOGI("This node [%s] should be set NC1HWC0", input_fp16_nodes_vec[i].c_str()); - if (!ge::AttrUtils::SetBool(op_desc, "input_set_nc1hwc0", true)) { - GELOGW("This node [%s] set NC1HWC0 failed", input_fp16_nodes_vec[i].c_str()); - } - } - } + AddAttrsForInputNodes(adjust_fp16_format_vec, input_fp16_nodes_vec[i], i, op_desc); } return SUCCESS; } @@ -201,30 +211,6 @@ static Status SetWeightCompressNodes(const ComputeGraphPtr &graph, const string return SUCCESS; } -static Status ParseOutputFp16NodesFormat(const string &is_output_fp16) { - if (is_output_fp16.empty()) { - return SUCCESS; - } - - vector &output_formats = domi::GetContext().output_formats; - output_formats.clear(); - vector node_format_vec = StringUtils::Split(is_output_fp16, ','); - for (auto &is_fp16 : node_format_vec) { - StringUtils::Trim(is_fp16); - if (!CheckInputTrueOrFalse(is_fp16, "is_output_adjust_hw_layout")) { - GELOGE(PARAM_INVALID, "Invalid Param, is_output_adjust_hw_layout only support true/false: but is [%s]", - is_output_fp16.c_str()); - return PARAM_INVALID; - } - if (is_fp16 == "false") { - output_formats.push_back(DOMI_TENSOR_ND); - } else if (is_fp16 == "true") { - output_formats.push_back(domi::DOMI_TENSOR_NC1HWC0); - } - } - return SUCCESS; -} - void FindParserSo(const string &path, vector &file_list, string &caffe_parser_path) { // path, Change to absolute path string real_path = RealPath(path.c_str()); @@ -306,198 +292,6 @@ Status SetOutFormatAndDataTypeAttr(ge::OpDescPtr op_desc, const ge::Format forma return domi::SUCCESS; } -bool CheckDigitStr(std::string &str) { - for (char c : str) { - if (!isdigit(c)) { - GELOGE(domi::FAILED, "value[%s] is not positive integer", str.c_str()); - return false; - } - } - return true; -} - -Status StringToInt(std::string &str, int32_t &value) { - try { - if (!CheckDigitStr(str)) { - GELOGE(PARAM_INVALID, "Invalid of digit string: %s ", str.c_str()); - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {"--output_type", str, "is not positive integer"}); - return PARAM_INVALID; - } - value = stoi(str); - } catch (std::invalid_argument &) { - GELOGE(PARAM_INVALID, "Invalid of digit string: %s, catch invalid_argument.", str.c_str()); - ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"output_type", str}); - return PARAM_INVALID; - } catch (std::out_of_range &) { - GELOGE(PARAM_INVALID, "Invalid of digit string: %s, catch out_of_range.", str.c_str()); - ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, {"output_type", str}); - return PARAM_INVALID; - } - return SUCCESS; -} - -Status VerifyOutputTypeAndOutNodes(std::vector &out_type_vec) { - std::vector> user_out_nodes = domi::GetContext().user_out_nodes; - std::set out_nodes_info; - for (uint32_t i = 0; i < user_out_nodes.size(); ++i) { - // out_nodes set should include output_type and output_format - std::string tmp = user_out_nodes[i].first + ":" + to_string(user_out_nodes[i].second); - out_nodes_info.emplace(tmp); - } - for (uint32_t i = 0; i < out_type_vec.size(); ++i) { - if (out_nodes_info.find(out_type_vec[i]) == out_nodes_info.end()) { - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {"--output_type", out_type_vec[i], kOutputTypeError}); - GELOGE(domi::FAILED, "Invalid value for --output_type[%s], %s.", out_type_vec[i].c_str(), kOutputTypeError); - return domi::FAILED; - } - } - return domi::SUCCESS; -} - -Status ParseOutputType(const std::string &output_type, std::map> &out_type_index_map, - std::map> &out_type_dt_map) { - if (output_type.find(':') == std::string::npos) { - GELOGI("output_type is not multiple nodes, means all out nodes"); - auto it = output_type_str_to_datatype.find(output_type); - if (it == output_type_str_to_datatype.end()) { - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {"--output_type", output_type, kOutputTypeSupport}); - GELOGE(PARAM_INVALID, "Invalid value for --output_type[%s], %s.", output_type.c_str(), kOutputTypeSupport); - return domi::FAILED; - } - return domi::SUCCESS; - } - std::vector out_type_vec; - vector nodes_v = StringUtils::Split(output_type, ';'); - for (const string &node : nodes_v) { - vector node_index_type_v = StringUtils::Split(node, ':'); - if (node_index_type_v.size() != 3) { // The size must be 3. - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {"--output_type", node, kOutputTypeSample}); - GELOGE(PARAM_INVALID, "Invalid value for --output_type[%s], %s.", node.c_str(), kOutputTypeSample); - return domi::FAILED; - } - ge::DataType tmp_dt; - std::string node_name = StringUtils::Trim(node_index_type_v[0]); - std::string index_str = StringUtils::Trim(node_index_type_v[1]); - int32_t index; - if (StringToInt(index_str, index) != SUCCESS) { - GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s.", index_str.c_str()); - return domi::FAILED; - } - std::string dt_value = StringUtils::Trim(node_index_type_v[2]); - auto it = output_type_str_to_datatype.find(dt_value); - if (it == output_type_str_to_datatype.end()) { - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {"--output_type", dt_value, kOutputTypeSupport}); - GELOGE(ge::PARAM_INVALID, "Invalid value for --output_type[%s], %s.", dt_value.c_str(), kOutputTypeSupport); - return domi::FAILED; - } else { - tmp_dt = it->second; - } - out_type_vec.push_back(node_name + ":" + index_str); - auto it_index = out_type_index_map.find(node_name); - if (it_index == out_type_index_map.end()) { - vector tmp_vec; - tmp_vec.push_back(index); - out_type_index_map.emplace(node_name, tmp_vec); - } else { - it_index->second.push_back(index); - } - - auto it_dt = out_type_dt_map.find(node_name); - if (it_dt == out_type_dt_map.end()) { - vector tmp_vec; - tmp_vec.push_back(tmp_dt); - out_type_dt_map.emplace(node_name, tmp_vec); - } else { - it_dt->second.push_back(tmp_dt); - } - } - return VerifyOutputTypeAndOutNodes(out_type_vec); -} - -Status CheckOutNode(ge::OpDescPtr op_desc, int32_t index) { - int32_t out_size = op_desc->GetOutputsSize(); - if (index < 0 || index >= out_size) { - GELOGE(domi::FAILED, - "out_node [%s] output index:%d must be smaller " - "than node output size:%d and can not be negative!", - op_desc->GetName().c_str(), index, out_size); - std::string fail_reason = "output index:" + to_string(index) + - " must be smaller than output size:" + to_string(out_size) + " and can not be negative!"; - ErrorManager::GetInstance().ATCReportErrMessage("E10003", {"parameter", "value", "reason"}, - {"out_nodes", op_desc->GetName(), fail_reason}); - return domi::FAILED; - } - return domi::SUCCESS; -} - -Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output) { - ge::ComputeGraphPtr compute_graph = ge::GraphUtils::GetComputeGraph(graph); - GE_CHECK_NOTNULL(compute_graph); - - std::vector> user_out_nodes = domi::GetContext().user_out_nodes; - std::vector output_formats = domi::GetContext().output_formats; - std::vector> output_nodes_info; - std::vector output_nodes_name; - std::map> out_type_index_map; - std::map> out_type_dt_map; - if (!output_type.empty()) { - if (ParseOutputType(output_type, out_type_index_map, out_type_dt_map) != SUCCESS) { - GELOGE(domi::FAILED, "Parse output_type failed."); - return domi::FAILED; - } - } - - // User declared outputs - for (uint32_t i = 0; i < user_out_nodes.size(); ++i) { - ge::NodePtr out_node = compute_graph->FindNode(user_out_nodes[i].first); - if (out_node == nullptr) { - GELOGE(domi::FAILED, "Can not find src node (%s) in graph.", user_out_nodes[i].first.c_str()); - return domi::FAILED; - } - auto op_desc = out_node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - if (CheckOutNode(op_desc, user_out_nodes[i].second) != SUCCESS) { - GELOGE(domi::FAILED, "Check out node (%s) fail.", user_out_nodes[i].first.c_str()); - return domi::FAILED; - } - if (i < output_formats.size()) { - if (output_formats[i] == domi::DOMI_TENSOR_NC1HWC0) { - GELOGI("The output node [%s] should be set NC1HWC0", user_out_nodes[i].first.c_str()); - if (!ge::AttrUtils::SetBool(op_desc, "output_set_fp16_nc1hwc0", true)) { - GELOGW("The output node [%s] set NC1HWC0 failed", user_out_nodes[i].first.c_str()); - } - } - } - auto it_index = out_type_index_map.find(user_out_nodes[i].first); - auto it_dt = out_type_dt_map.find(user_out_nodes[i].first); - if ((it_index != out_type_index_map.end()) && (it_dt != out_type_dt_map.end())) { - GELOGI("The output node [%s] need to be set output_type", user_out_nodes[i].first.c_str()); - (void)ge::AttrUtils::SetListDataType(op_desc, "_output_dt_list", it_dt->second); - (void)ge::AttrUtils::SetListInt(op_desc, "_output_dt_index", it_index->second); - } - output_nodes_info.push_back(std::make_pair(out_node, user_out_nodes[i].second)); - output_nodes_name.push_back(out_node->GetName() + ":" + std::to_string(user_out_nodes[i].second)); - } - // default output node (leaf) - if (user_out_nodes.empty()) { - for (ge::NodePtr node : compute_graph->GetDirectNode()) { - if (!node->GetInDataNodes().empty() && node->GetOutDataNodes().empty()) { - Status ret = GetOutputLeaf(node, output_nodes_info); - GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "find leaf fail."); - } - } - } - GetOutputNodesNameAndIndex(output_nodes_info, output_nodes_name); - compute_graph->SetGraphOutNodesInfo(output_nodes_info); - domi::GetContext().net_out_nodes = output_nodes_name; - return domi::SUCCESS; -} - void GetOutputNodesNameAndIndex(std::vector> &output_nodes_info, std::vector &output_nodes_name) { output_nodes_name.clear(); @@ -523,32 +317,6 @@ void GetOutputNodesNameAndIndex(std::vector> &ou } } -Status GetOutputLeaf(NodePtr node, std::vector> &output_nodes_info) { - ge::OpDescPtr tmpDescPtr = node->GetOpDesc(); - if (tmpDescPtr == nullptr) { - GELOGE(domi::FAILED, "Get outnode op desc fail."); - return domi::FAILED; - } - size_t size = tmpDescPtr->GetOutputsSize(); - if (node->GetType() != NETOUTPUT) { - for (size_t index = 0; index < size; ++index) { - output_nodes_info.push_back(std::make_pair(node, index)); - } - } else { - const auto in_anchors = node->GetAllInDataAnchors(); - for (auto in_anchor : in_anchors) { - auto out_anchor = in_anchor->GetPeerOutAnchor(); - if (out_anchor == nullptr) { - GELOGE(domi::FAILED, "Get leaf node op desc fail."); - return domi::FAILED; - } - auto out_node = out_anchor->GetOwnerNode(); - output_nodes_info.push_back(std::make_pair(out_node, out_anchor->GetIdx())); - } - } - return SUCCESS; -} - /// /// @ingroup domi_common /// @brief Initialize omgcontext based on command line input @@ -592,58 +360,6 @@ Status InitDomiOmgContext(const string &input_shape, const string &input_format, return SUCCESS; } -Status ParseOutNodes(const string &out_nodes) { - try { - // parse output node - if (!out_nodes.empty()) { - domi::GetContext().out_nodes_map.clear(); - domi::GetContext().user_out_nodes.clear(); - - vector nodes_v = StringUtils::Split(out_nodes, ';'); - for (const string &node : nodes_v) { - vector key_value_v = StringUtils::Split(node, ':'); - if (key_value_v.size() != 2) { // The size must be 2. - ErrorManager::GetInstance().ATCReportErrMessage( - "E10001", {"parameter", "value", "reason"}, - {"--out_nodes", node, "the correct format is \"node_name1:0;node_name1:1;node_name2:0\""}); - GELOGE(PARAM_INVALID, - "The input format of --out_nodes is invalid, the correct format is " - "\"node_name1:0;node_name1:1;node_name2:0\", while the actual input is %s.", - node.c_str()); - return PARAM_INVALID; - } - auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]); - // stoi: The method may throw an exception: invalid_argument/out_of_range - if (!CheckDigitStr(key_value_v[1])) { - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {"--out_nodes", out_nodes, "is not positive integer"}); - GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s", out_nodes.c_str()); - return PARAM_INVALID; - } - int32_t index = stoi(StringUtils::Trim(key_value_v[1])); - if (iter != domi::GetContext().out_nodes_map.end()) { - iter->second.emplace_back(index); - } else { - std::vector index_v; - index_v.emplace_back(index); - domi::GetContext().out_nodes_map.emplace(key_value_v[0], index_v); - } - domi::GetContext().user_out_nodes.push_back(std::make_pair(key_value_v[0], index)); - } - } - } catch (std::invalid_argument &) { - GELOGE(PARAM_INVALID, "Invalid of out_nodes: %s ", out_nodes.c_str()); - ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"out_nodes", out_nodes}); - return PARAM_INVALID; - } catch (std::out_of_range &) { - GELOGE(PARAM_INVALID, "Invalid of out_nodes: %s ", out_nodes.c_str()); - ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, {"out_nodes", out_nodes}); - return PARAM_INVALID; - } - - return SUCCESS; -} - /// @ingroup domi_common /// @brief Judge whether the op_Name_Map parameter matches the network /// @param [in] graph Input network graph @@ -712,7 +428,7 @@ FMK_FUNC_HOST_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map &input_desc, const std::vector &inputs, + std::vector &output_desc, std::vector &outputs) const { + if (inputs.size() != input_desc.size()) { + GELOGE(PARAM_INVALID, "Input number mismatches input desc number. Input num = %zu, input desc num = %zu", + inputs.size(), input_desc.size()); + return PARAM_INVALID; + } + + if (outputs.size() != output_desc.size()) { + GELOGE(PARAM_INVALID, "Output number mismatches output desc number. Output num = %zu, output desc num = %zu", + outputs.size(), output_desc.size()); + return PARAM_INVALID; + } + + if (input_desc.size() != num_inputs_) { + GELOGE(PARAM_INVALID, "Input number mismatches. expect %zu, but given %zu", num_inputs_, input_desc.size()); + return PARAM_INVALID; + } + + if (output_desc.size() != num_outputs_) { + GELOGE(PARAM_INVALID, "Output number mismatches. expect %zu, but given %zu", num_outputs_, output_desc.size()); + return PARAM_INVALID; + } + + return SUCCESS; +} + +Status DynamicSingleOp::AllocateWorkspaces(const std::vector &workspace_sizes, + std::vector &workspaces) { + static const std::string kPurpose("malloc workspace memory for dynamic op."); + if (workspace_sizes.empty()) { + GELOGD("No need to allocate workspace."); + return SUCCESS; + } + int64_t total_size = 0; + std::vector ws_offsets; + for (auto ws_size : workspace_sizes) { + // alignment and padding should be done in OpParaCalculate + GE_CHK_STATUS_RET_NOLOG(CheckInt64AddOverflow(total_size, ws_size)); + ws_offsets.emplace_back(total_size); + total_size += ws_size; + } + + GELOGD("Total workspace size is %ld", total_size); + StreamResource *stream_resource = SingleOpManager::GetInstance().GetResource(resource_id_, stream_); + GE_CHECK_NOTNULL(stream_resource); + auto ws_base = stream_resource->MallocMemory(kPurpose, static_cast(total_size)); + if (ws_base == nullptr) { + GELOGE(MEMALLOC_FAILED, "Failed to allocate memory of size: %ld", total_size); + return MEMALLOC_FAILED; + } + GELOGD("Done allocating workspace memory successfully."); + + for (auto ws_offset : ws_offsets) { + workspaces.emplace_back(ws_base + ws_offset); + } + + return SUCCESS; +} + +Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, const vector &input_buffers, + vector &output_desc, vector &output_buffers) { + GE_CHECK_NOTNULL(op_task_); + GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); + GE_CHK_STATUS_RET_NOLOG(op_task_->UpdateRunInfo(input_desc, output_desc)); + std::vector workspace_buffers; + GE_CHK_STATUS_RET_NOLOG(AllocateWorkspaces(op_task_->GetWorkspaceSizes(), workspace_buffers)); + std::vector inputs; + std::vector outputs; + for (auto &buffer : input_buffers) { + inputs.emplace_back(buffer.data); + } + for (auto &buffer : output_buffers) { + outputs.emplace_back(buffer.data); + } + return op_task_->LaunchKernel(inputs, outputs, workspace_buffers, stream_); +} } // namespace ge diff --git a/src/ge/single_op/single_op.h b/src/ge/single_op/single_op.h index 08782b3b..d86c79ee 100644 --- a/src/ge/single_op/single_op.h +++ b/src/ge/single_op/single_op.h @@ -53,5 +53,26 @@ class SingleOp { std::vector> arg_table_; bool use_physical_addr_ = false; }; + +class DynamicSingleOp { + public: + DynamicSingleOp(uintptr_t resource_id, rtStream_t stream); + ~DynamicSingleOp() = default; + Status ExecuteAsync(const vector &input_desc, const std::vector &inputs, + std::vector &output_desc, std::vector &outputs); + + private: + friend class SingleOpModel; + Status ValidateParams(const vector &input_desc, const std::vector &inputs, + std::vector &output_desc, std::vector &outputs) const; + + Status AllocateWorkspaces(const std::vector &workspace_sizes, std::vector &workspaces); + + std::unique_ptr op_task_; + uintptr_t resource_id_ = 0; + rtStream_t stream_ = nullptr; + size_t num_inputs_ = 0; + size_t num_outputs_ = 0; +}; } // namespace ge #endif // GE_SINGLE_OP_SINGLE_OP_H_ diff --git a/src/ge/single_op/single_op_manager.cc b/src/ge/single_op/single_op_manager.cc index 79f3f044..aa6f6d2b 100644 --- a/src/ge/single_op/single_op_manager.cc +++ b/src/ge/single_op/single_op_manager.cc @@ -19,9 +19,6 @@ #include #include -#include "runtime/dev.h" -#include "framework/common/debug/ge_log.h" - namespace ge { FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOpManager::~SingleOpManager() { for (auto &it : stream_resources_) { @@ -34,31 +31,15 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOpManager::GetOpFr const ModelData &model_data, void *stream, SingleOp **single_op) { + GELOGI("GetOpFromModel in. model name = %s", model_name.c_str()); if (single_op == nullptr) { GELOGE(PARAM_INVALID, "single op is null"); return PARAM_INVALID; } - uintptr_t resource_id; - // runtime uses NULL to denote a default stream for each device - if (stream == nullptr) { - // use device id as resource key instead - int32_t dev_id = 0; - auto rt_err = rtGetDevice(&dev_id); - if (rt_err != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Get current device id failed. ret = %d", static_cast(rt_err)); - return RT_FAILED; - } - - GELOGI("GetOpFromModel with default stream. device id = %d", dev_id); - resource_id = static_cast(dev_id); - } else { - resource_id = reinterpret_cast(stream); - } - - GELOGI("GetOpFromModel in. model name = %s, resource id = 0x%lx", model_name.c_str(), - static_cast(resource_id)); - StreamResource *res = GetResource(resource_id); + uintptr_t resource_id = 0; + GE_CHK_STATUS_RET(GetResourceId(stream, resource_id)); + StreamResource *res = GetResource(resource_id, stream); if (res == nullptr) { GELOGE(MEMALLOC_FAILED, "GetResource failed"); return MEMALLOC_FAILED; @@ -78,26 +59,19 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOpManager::GetOpFr return ret; } - auto *new_op = new (std::nothrow) SingleOp(); + auto new_op = std::unique_ptr(new (std::nothrow) SingleOp()); if (new_op == nullptr) { GELOGE(MEMALLOC_FAILED, "new SingleOp failed"); return MEMALLOC_FAILED; } GELOGI("To build operator: %s", model_name.c_str()); - ret = model.BuildOp(*res, *new_op); - if (ret != SUCCESS) { - GELOGE(ret, "Build op failed. op = %s, resource id = 0x%lx, ret = %u", model_name.c_str(), - static_cast(resource_id), ret); - delete new_op; - new_op = nullptr; - return ret; - } + GE_CHK_STATUS_RET(model.BuildOp(*res, *new_op), "Build op failed. op = %s, ret = %u", model_name.c_str(), ret); // stream is nullable new_op->SetStream(stream); - res->CacheOperator(model_data.model_data, new_op); - *single_op = new_op; + *single_op = new_op.get(); + res->CacheOperator(model_data.model_data, std::move(new_op)); return SUCCESS; } @@ -115,13 +89,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOpManager::Release return SUCCESS; } -StreamResource *SingleOpManager::GetResource(uintptr_t resource_id) { +StreamResource *SingleOpManager::GetResource(uintptr_t resource_id, rtStream_t stream) { std::lock_guard lock(mutex_); auto it = stream_resources_.find(resource_id); StreamResource *res = nullptr; if (it == stream_resources_.end()) { res = new (std::nothrow) StreamResource(); if (res != nullptr) { + res->SetStream(stream); stream_resources_.emplace(resource_id, res); } } else { @@ -140,4 +115,74 @@ StreamResource *SingleOpManager::TryGetResource(uintptr_t resource_id) { return it->second; } + +Status SingleOpManager::GetDynamicOpFromModel(const string &model_name, const ModelData &model_data, void *stream, + DynamicSingleOp **single_op) { + GE_CHECK_NOTNULL(single_op); + uintptr_t resource_id = 0; + GE_CHK_STATUS_RET(GetResourceId(stream, resource_id)); + StreamResource *res = GetResource(resource_id, stream); + if (res == nullptr) { + GELOGE(MEMALLOC_FAILED, "GetResource failed"); + return MEMALLOC_FAILED; + } + + DynamicSingleOp *op = res->GetDynamicOperator(model_data.model_data); + if (op != nullptr) { + GELOGD("Got operator from stream cache"); + *single_op = op; + return SUCCESS; + } + + if (!tiling_func_registered_) { + RegisterTilingFunc(); + } + + SingleOpModel model(model_name, model_data.model_data, model_data.model_len); + auto ret = model.Init(); + if (ret != SUCCESS) { + GELOGE(ret, "Init model failed. model = %s, ret = %u", model_name.c_str(), ret); + return ret; + } + + auto new_op = std::unique_ptr(new (std::nothrow) DynamicSingleOp(resource_id, stream)); + GE_CHECK_NOTNULL(new_op); + + GELOGI("To build operator: %s", model_name.c_str()); + GE_CHK_STATUS_RET(model.BuildDynamicOp(*new_op), "Build op failed. op = %s, ret = %u", model_name.c_str(), ret); + *single_op = new_op.get(); + res->CacheDynamicOperator(model_data.model_data, std::move(new_op)); + return SUCCESS; +} + +void SingleOpManager::RegisterTilingFunc() { + std::lock_guard lk(mutex_); + if (tiling_func_registered_) { + return; + } + + op_tiling_manager_.LoadSo(); + tiling_func_registered_ = true; +} + +Status SingleOpManager::GetResourceId(rtStream_t stream, uintptr_t &resource_id) { + // runtime uses NULL to denote a default stream for each device + if (stream == nullptr) { + // get current context + rtContext_t rt_cur_ctx = nullptr; + auto rt_err = rtCtxGetCurrent(&rt_cur_ctx); + if (rt_err != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast(rt_err)); + return RT_FAILED; + } + // use current context as resource key instead + GELOGI("use context as resource key instead when default stream"); + resource_id = reinterpret_cast(rt_cur_ctx); + } else { + GELOGI("use stream as resource key instead when create stream"); + resource_id = reinterpret_cast(stream); + } + + return SUCCESS; +} } // namespace ge diff --git a/src/ge/single_op/single_op_manager.h b/src/ge/single_op/single_op_manager.h index 15d32316..09ae0e4e 100644 --- a/src/ge/single_op/single_op_manager.h +++ b/src/ge/single_op/single_op_manager.h @@ -20,7 +20,7 @@ #include #include #include - +#include "common/ge/op_tiling_manager.h" #include "single_op/single_op_model.h" #include "single_op/stream_resource.h" @@ -34,16 +34,27 @@ class SingleOpManager { return instance; } - Status GetOpFromModel(const std::string &key, const ge::ModelData &model_data, void *stream, SingleOp **single_op); + Status GetOpFromModel(const std::string &model_name, const ge::ModelData &model_data, void *stream, + SingleOp **single_op); + + Status GetDynamicOpFromModel(const std::string &model_name, const ge::ModelData &model_data, void *stream, + DynamicSingleOp **dynamic_single_op); + + StreamResource *GetResource(uintptr_t resource_id, rtStream_t stream); Status ReleaseResource(void *stream); + void RegisterTilingFunc(); + private: - StreamResource *GetResource(uintptr_t resource_id); + static Status GetResourceId(rtStream_t stream, uintptr_t &resource_id); + StreamResource *TryGetResource(uintptr_t resource_id); std::mutex mutex_; + bool tiling_func_registered_ = false; std::unordered_map stream_resources_; + OpTilingManager op_tiling_manager_; }; } // namespace ge diff --git a/src/ge/single_op/single_op_model.cc b/src/ge/single_op/single_op_model.cc index b72a41fc..27958e7c 100644 --- a/src/ge/single_op/single_op_model.cc +++ b/src/ge/single_op/single_op_model.cc @@ -43,12 +43,8 @@ SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_da : model_name_(model_name), ori_model_data_(model_data), ori_model_size_(model_size) {} Status SingleOpModel::Init() { - auto ret = InitModel(); - if (ret != SUCCESS) { - return ret; - } - - return ParseInputsAndOutputs(); + GE_CHK_STATUS_RET_NOLOG(InitModel()); + return LoadAllNodes(); } Status SingleOpModel::InitModel() { @@ -150,7 +146,7 @@ void SingleOpModel::ParseOutputNode(const OpDescPtr &op_desc) { } } -Status SingleOpModel::ParseInputsAndOutputs() { +Status SingleOpModel::LoadAllNodes() { auto ge_model = model_helper_.GetGeModel(); GE_CHECK_NOTNULL(ge_model); Graph graph = ge_model->GetGraph(); @@ -168,19 +164,18 @@ Status SingleOpModel::ParseInputsAndOutputs() { auto node = nodes.at(i); auto op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); - op_list_[i] = op_desc; + op_list_[i] = node; auto op_type = op_desc->GetType(); GELOGI("[%s] node[%zu] = %s, type = %s", model_name_.c_str(), i, node->GetName().c_str(), op_type.c_str()); if (op_type == DATA_TYPE || op_type == AIPP_DATA_TYPE) { - auto ret = ParseInputNode(op_desc); - if (ret != SUCCESS) { - return ret; - } + data_ops_.emplace_back(op_desc); + continue; } if (op_type == NETOUTPUT) { - ParseOutputNode(op_desc); + netoutput_op_ = op_desc; + continue; } ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(op_desc); @@ -189,6 +184,14 @@ Status SingleOpModel::ParseInputsAndOutputs() { return SUCCESS; } +Status SingleOpModel::ParseInputsAndOutputs() { + for (auto &op_desc : data_ops_) { + GE_CHK_STATUS_RET_NOLOG(ParseInputNode(op_desc)); + } + ParseOutputNode(netoutput_op_); + return SUCCESS; +} + Status SingleOpModel::SetInputsAndOutputs(SingleOp &single_op) { // for lhisi const char *use_physical_address = std::getenv("GE_USE_PHYSICAL_ADDRESS"); @@ -230,12 +233,15 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { auto kernel_type = static_cast(context.kernel_type()); if (kernel_type == cce::ccKernelType::TE) { GELOGD("Building TBE task"); - OpTask *task = nullptr; - auto ret = BuildKernelTask(task_def.kernel(), single_op, &task); + TbeOpTask *tbe_task = nullptr; + auto ret = BuildKernelTask(task_def.kernel(), &tbe_task); if (ret != SUCCESS) { return ret; } - single_op.tasks_.emplace_back(task); + + single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); + ParseArgTable(tbe_task, single_op); + single_op.tasks_.emplace_back(tbe_task); } else if (kernel_type == cce::ccKernelType::AI_CPU) { GELOGD("Building AICPU_CC task"); OpTask *task = nullptr; @@ -284,7 +290,7 @@ void SingleOpModel::ParseArgTable(TbeOpTask *task, SingleOp &op) { } } -Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, SingleOp &single_op, OpTask **task) { +Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task) { GE_CHECK_NOTNULL(task); const auto &context = kernel_def.context(); auto iter = op_list_.find(context.op_index()); @@ -307,9 +313,6 @@ Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, SingleO return ret; } - single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); - ParseArgTable(tbe_task, single_op); - *task = tbe_task; return SUCCESS; } @@ -326,7 +329,7 @@ Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, Sin GELOGE(MEMALLOC_FAILED, "create aicpu_TF op task failed"); return MEMALLOC_FAILED; } - auto builder = AiCpuTaskBuilder(iter->second, kernel_def); + auto builder = AiCpuTaskBuilder(iter->second->GetOpDesc(), kernel_def); auto ret = builder.BuildTask(*aicpu_task, model_params_); if (ret != SUCCESS) { GELOGE(ret, "build aicpu_TF op task failed"); @@ -356,15 +359,44 @@ Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTa } Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { - auto ret = InitModelMem(resource); - if (ret != SUCCESS) { - return ret; - } + GE_CHK_STATUS_RET_NOLOG(ParseInputsAndOutputs()); + GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); + GE_CHK_STATUS_RET_NOLOG(SetInputsAndOutputs(single_op)); + return BuildTaskList(single_op); +} - ret = SetInputsAndOutputs(single_op); - if (ret != SUCCESS) { - return ret; +Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { + auto ge_model = model_helper_.GetGeModel(); + GE_CHECK_NOTNULL(ge_model); + + auto tasks = ge_model->GetModelTaskDefPtr()->task(); + for (int i = 0; i < tasks.size(); ++i) { + const TaskDef &task_def = tasks[i]; + GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(), + task_def.DebugString().c_str()); + auto task_type = static_cast(task_def.type()); + if (task_type == RT_MODEL_TASK_KERNEL) { + if (single_op.op_task_ != nullptr) { + GELOGE(UNSUPPORTED, "Do not support dynamic op with multiple tasks."); + return UNSUPPORTED; + } + + TbeOpTask *task = nullptr; + GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &task)); + single_op.op_task_.reset(task); + } else { + // skip + GELOGD("Skip task type: %d", static_cast(task_type)); + } } - return BuildTaskList(single_op); + + return SUCCESS; +} + +Status SingleOpModel::BuildDynamicOp(DynamicSingleOp &single_op) { + single_op.num_inputs_ = data_ops_.size(); + single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); + ParseOpModelParams(model_helper_, model_params_); + return BuildTaskListForDynamicOp(single_op); } } // namespace ge diff --git a/src/ge/single_op/single_op_model.h b/src/ge/single_op/single_op_model.h index 3b8c2616..caa958e5 100644 --- a/src/ge/single_op/single_op_model.h +++ b/src/ge/single_op/single_op_model.h @@ -50,9 +50,11 @@ class SingleOpModel { Status Init(); Status BuildOp(StreamResource &resource, SingleOp &single_op); + Status BuildDynamicOp(DynamicSingleOp &single_op); private: Status InitModel(); + Status LoadAllNodes(); Status ParseInputsAndOutputs(); Status SetInputsAndOutputs(SingleOp &single_op); @@ -62,7 +64,8 @@ class SingleOpModel { void ParseOutputNode(const OpDescPtr &op_desc); Status BuildTaskList(SingleOp &single_op); - Status BuildKernelTask(const domi::KernelDef &kernel_def, SingleOp &single_op, OpTask **task); + Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); + Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task); Status BuildKernelExTask(const domi::KernelExDef &kernel_def, SingleOp &single_op, OpTask **task); Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task); @@ -75,13 +78,15 @@ class SingleOpModel { ModelHelper model_helper_; - map op_list_; + map op_list_; SingleOpModelParam model_params_; std::vector input_offset_list_; std::vector input_sizes_; std::vector output_offset_list_; std::vector output_sizes_; + std::vector data_ops_; + OpDescPtr netoutput_op_; }; } // namespace ge diff --git a/src/ge/single_op/stream_resource.cc b/src/ge/single_op/stream_resource.cc index e48afb96..703b22b2 100644 --- a/src/ge/single_op/stream_resource.cc +++ b/src/ge/single_op/stream_resource.cc @@ -23,12 +23,6 @@ namespace ge { StreamResource::~StreamResource() { - for (auto it : op_map_) { - // it's safe to delete a nullptr - delete it.second; - it.second = nullptr; - } - for (auto mem : memory_list_) { if (mem != nullptr) { auto rt_ret = rtFree(mem); @@ -44,7 +38,13 @@ StreamResource::~StreamResource() { } } -void StreamResource::CacheOperator(const void *key, SingleOp *single_op) { op_map_[key] = single_op; } +void StreamResource::CacheOperator(const void *key, std::unique_ptr &&single_op) { + op_map_[key] = std::move(single_op); +} + +void StreamResource::CacheDynamicOperator(const void *key, std::unique_ptr &&single_op) { + dynamic_op_map_[key] = std::move(single_op); +} SingleOp *StreamResource::GetOperator(const void *key) { auto it = op_map_.find(key); @@ -52,9 +52,20 @@ SingleOp *StreamResource::GetOperator(const void *key) { return nullptr; } - return it->second; + return it->second.get(); } +DynamicSingleOp *StreamResource::GetDynamicOperator(const void *key) { + auto it = dynamic_op_map_.find(key); + if (it == dynamic_op_map_.end()) { + return nullptr; + } + + return it->second.get(); +} + +void StreamResource::SetStream(rtStream_t stream) { stream_ = stream; } + uint8_t *StreamResource::DoMallocMemory(const std::string &purpose, size_t size, size_t &max_allocated, std::vector &allocated) { if (size <= max_allocated && !allocated.empty()) { @@ -62,6 +73,20 @@ uint8_t *StreamResource::DoMallocMemory(const std::string &purpose, size_t size, return allocated.back(); } + if (!allocated.empty()) { + GELOGD("Expand workspace memory size from %zu to %zu", max_allocated, size); + auto ret = rtStreamSynchronize(stream_); + if (ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "rtStreamSynchronize failed, ret = %d", ret); + return nullptr; + } + + auto addr = allocated.back(); + allocated.pop_back(); + (void)rtFree(addr); + max_allocated = 0; + } + uint8_t *buffer = nullptr; auto ret = rtMalloc(reinterpret_cast(&buffer), size, RT_MEMORY_HBM); if (ret != RT_ERROR_NONE) { diff --git a/src/ge/single_op/stream_resource.h b/src/ge/single_op/stream_resource.h index fc114c08..6f26c497 100644 --- a/src/ge/single_op/stream_resource.h +++ b/src/ge/single_op/stream_resource.h @@ -37,22 +37,27 @@ class StreamResource { StreamResource &operator=(const StreamResource &) = delete; StreamResource &operator=(StreamResource &&) = delete; - void CacheOperator(const void *key, SingleOp *single_op); + void CacheOperator(const void *key, std::unique_ptr &&single_op); + void CacheDynamicOperator(const void *key, std::unique_ptr &&single_op); + void SetStream(rtStream_t stream); SingleOp *GetOperator(const void *key); + DynamicSingleOp *GetDynamicOperator(const void *key); uint8_t *MallocMemory(const std::string &purpose, size_t size); uint8_t *MallocWeight(const std::string &purpose, size_t size); private: - static uint8_t *DoMallocMemory(const std::string &purpose, size_t size, size_t &max_allocated, - std::vector &allocated); + uint8_t *DoMallocMemory(const std::string &purpose, size_t size, size_t &max_allocated, + std::vector &allocated); size_t max_memory_size_ = 0; size_t max_weight_size_ = 0; std::vector memory_list_; std::vector weight_list_; - std::unordered_map op_map_; + std::unordered_map> op_map_; + std::unordered_map> dynamic_op_map_; + rtStream_t stream_ = nullptr; }; } // namespace ge diff --git a/src/ge/single_op/task/op_task.cc b/src/ge/single_op/task/op_task.cc index 19e8b6a4..ddc4992c 100644 --- a/src/ge/single_op/task/op_task.cc +++ b/src/ge/single_op/task/op_task.cc @@ -18,9 +18,11 @@ #include #include +#include #include "runtime/rt.h" -#include "framework/common/debug/ge_log.h" +#include "register/op_tiling.h" +#include "framework/common/debug/log.h" namespace ge { namespace { @@ -33,25 +35,29 @@ void TbeOpTask::SetStubFunc(const std::string &name, const void *stub_func) { this->stub_func_ = stub_func; } -void TbeOpTask::SetKernelArgs(void *args, size_t arg_size, uint32_t block_dim) { - args_ = args; +void TbeOpTask::SetKernelArgs(std::unique_ptr &&args, size_t arg_size, uint32_t block_dim) { + args_ = std::move(args); arg_size_ = arg_size; block_dim_ = block_dim; } void TbeOpTask::SetSmDesc(void *sm_desc) { sm_desc_ = sm_desc; } -TbeOpTask::~TbeOpTask() { - if (args_ != nullptr) { - (void)rtFreeHost(args_); - } +const vector &OpTask::GetWorkspaceSizes() const { return workspace_sizes_; } + +void OpTask::SetWorkspaceSizes(const vector &workspace_sizes) { workspace_sizes_ = workspace_sizes; } +TbeOpTask::~TbeOpTask() { if (sm_desc_ != nullptr) { (void)rtMemFreeManaged(sm_desc_); } + + if (tiling_buffer_ != nullptr) { + (void)rtFree(tiling_buffer_); + } } -const void *TbeOpTask::GetArgs() const { return args_; } +const void *TbeOpTask::GetArgs() const { return args_.get(); } size_t TbeOpTask::GetArgSize() const { return arg_size_; } @@ -60,13 +66,13 @@ const std::string &TbeOpTask::GetStubName() const { return stub_name_; } Status TbeOpTask::LaunchKernel(rtStream_t stream) { GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); auto *sm_desc = reinterpret_cast(sm_desc_); - auto ret = rtKernelLaunch(stub_func_, block_dim_, args_, static_cast(arg_size_), sm_desc, stream); + auto ret = rtKernelLaunch(stub_func_, block_dim_, args_.get(), static_cast(arg_size_), sm_desc, stream); int retry_times = 0; while (ret != RT_ERROR_NONE && retry_times < kLaunchRetryTimes) { retry_times++; GELOGW("Retry after %d ms, retry_times: %d", kSleepTime, retry_times); std::this_thread::sleep_for(std::chrono::milliseconds(kSleepTime)); - ret = rtKernelLaunch(stub_func_, block_dim_, args_, arg_size_, sm_desc, stream); + ret = rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, sm_desc, stream); } if (ret != RT_ERROR_NONE) { @@ -78,6 +84,103 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) { return SUCCESS; } +Status TbeOpTask::UpdateRunInfo(const vector &input_desc, const vector &output_desc) { + GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc)); + // invoke OpParaCalculate + GELOGD("Start to invoke OpParaCalculate."); + optiling::OpRunInfo run_info; + auto ret = optiling::OpParaCalculate(*node_, run_info); + if (ret != GRAPH_SUCCESS) { + GELOGE(FAILED, "Failed to invoke OpParaCalculate. ret = %u", ret); + return FAILED; + } + SetWorkspaceSizes(run_info.workspaces); + block_dim_ = run_info.block_dim; + tiling_data_ = run_info.tiling_data.str(); + GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu", block_dim_, + tiling_data_.size()); + return SUCCESS; +} + +Status TbeOpTask::UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor) { + int64_t storage_format_val = static_cast(FORMAT_RESERVED); + (void)AttrUtils::GetInt(src_tensor, ge::ATTR_NAME_STORAGE_FORMAT, storage_format_val); + auto storage_format = static_cast(storage_format_val); + if (storage_format == FORMAT_RESERVED) { + GELOGD("Storage format not set. update shape to [%s], and original shape to [%s]", + src_tensor.GetShape().ToString().c_str(), src_tensor.GetOriginShape().ToString().c_str()); + dst_tensor.SetShape(src_tensor.GetShape()); + dst_tensor.SetOriginShape(src_tensor.GetOriginShape()); + } else { + std::vector storage_shape; + if (!AttrUtils::GetListInt(src_tensor, ge::ATTR_NAME_STORAGE_SHAPE, storage_shape)) { + GELOGE(PARAM_INVALID, "Failed to get storage_shape while storage_format was set"); + return PARAM_INVALID; + } + + GELOGD("Storage format set. update shape to [%s], and original shape to [%s]", + GeShape(storage_shape).ToString().c_str(), src_tensor.GetShape().ToString().c_str()); + dst_tensor.SetShape(GeShape(std::move(storage_shape))); + dst_tensor.SetOriginShape(src_tensor.GetShape()); + } + + return SUCCESS; +} + +Status TbeOpTask::UpdateNodeByShape(const vector &input_desc, const vector &output_desc) { + auto op_desc = node_->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + // Set runtime shape to node + for (size_t i = 0; i < input_desc.size(); ++i) { + auto tensor_desc = op_desc->MutableInputDesc(i); + auto &runtime_tensor_desc = input_desc[i]; + GE_CHECK_NOTNULL(tensor_desc); + GE_CHK_STATUS_RET(UpdateTensorDesc(runtime_tensor_desc, *tensor_desc)); + } + + for (size_t i = 0; i < output_desc.size(); ++i) { + auto tensor_desc = op_desc->MutableOutputDesc(i); + auto &runtime_tensor_desc = output_desc[i]; + GE_CHECK_NOTNULL(tensor_desc); + GE_CHK_STATUS_RET(UpdateTensorDesc(runtime_tensor_desc, *tensor_desc)); + } + + return SUCCESS; +} + +void TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size) { + node_ = node; + tiling_buffer_ = tiling_buffer; + max_tiling_size_ = max_tiling_size; +} + +Status TbeOpTask::LaunchKernel(const vector &inputs, const vector &outputs, + const vector &workspaces, rtStream_t stream) { + GELOGD("[%s] Start to launch kernel", node_->GetName().c_str()); + std::vector args; + args.insert(args.end(), inputs.begin(), inputs.end()); + args.insert(args.end(), outputs.begin(), outputs.end()); + args.insert(args.end(), workspaces.begin(), workspaces.end()); + + if (tiling_buffer_ != nullptr) { + GELOGD("[%s] Start to copy tiling info. size = %zu", node_->GetName().c_str(), tiling_data_.size()); + GE_CHK_RT_RET(rtMemcpyAsync(tiling_buffer_, max_tiling_size_, tiling_data_.data(), tiling_data_.size(), + RT_MEMCPY_HOST_TO_DEVICE_EX, stream)); + + args.emplace_back(tiling_buffer_); + } + + if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) { + GELOGE(INTERNAL_ERROR, "[%s] Failed to update kernel args.", node_->GetName().c_str()); + return INTERNAL_ERROR; + } + + GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); + GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream)); + GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str()); + return SUCCESS; +} + AiCpuTask::~AiCpuTask() { if (args_ != nullptr) { (void)rtFree(args_); diff --git a/src/ge/single_op/task/op_task.h b/src/ge/single_op/task/op_task.h index fd4cc96f..3e261b3f 100644 --- a/src/ge/single_op/task/op_task.h +++ b/src/ge/single_op/task/op_task.h @@ -19,10 +19,12 @@ #include #include +#include #include "runtime/stream.h" #include "common/ge_inner_error_codes.h" #include "graph/op_kernel_bin.h" +#include "graph/node.h" namespace ge { enum OpTaskType { @@ -37,7 +39,20 @@ class OpTask { OpTask() = default; virtual ~OpTask() = default; virtual Status LaunchKernel(rtStream_t stream) = 0; + virtual Status UpdateRunInfo(const vector &input_desc, const vector &output_desc) { + return UNSUPPORTED; + } + virtual Status LaunchKernel(const std::vector &inputs, const std::vector &outputs, + const std::vector &workspaces, rtStream_t stream) { + return UNSUPPORTED; + } virtual OpTaskType GetOpTaskType() = 0; + + const vector &GetWorkspaceSizes() const; + void SetWorkspaceSizes(const vector &workspace_sizes); + + private: + std::vector workspace_sizes_; }; class TbeOpTask : public OpTask { @@ -48,18 +63,33 @@ class TbeOpTask : public OpTask { void SetSmDesc(void *sm_desc); void SetStubFunc(const std::string &name, const void *stub_func); - void SetKernelArgs(void *args, size_t arg_size, uint32_t block_dim); + void SetKernelArgs(std::unique_ptr &&args, size_t arg_size, uint32_t block_dim); + + Status UpdateRunInfo(const vector &input_desc, const vector &output_desc) override; + + Status LaunchKernel(const vector &inputs, const vector &outputs, const vector &workspaces, + rtStream_t stream) override; + const void *GetArgs() const; size_t GetArgSize() const; const std::string &GetStubName() const; + void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); private: + static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor); + Status UpdateNodeByShape(const vector &input_desc, const vector &output_desc); + const void *stub_func_ = nullptr; - void *args_ = nullptr; + std::unique_ptr args_; size_t arg_size_ = 0; uint32_t block_dim_ = 1; void *sm_desc_ = nullptr; std::string stub_name_; + + void *tiling_buffer_ = nullptr; + uint32_t max_tiling_size_ = 0; + std::string tiling_data_; + NodePtr node_; }; class AiCpuTask : public OpTask { diff --git a/src/ge/single_op/task/tbe_task_builder.cc b/src/ge/single_op/task/tbe_task_builder.cc index a422fb96..23c023fd 100644 --- a/src/ge/single_op/task/tbe_task_builder.cc +++ b/src/ge/single_op/task/tbe_task_builder.cc @@ -17,20 +17,18 @@ #include "single_op/task/tbe_task_builder.h" #include -#include #include -#include "common/helper/model_helper.h" -#include "framework/common/debug/ge_log.h" #include "graph/load/new_model_manager/model_utils.h" #include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/task_info/task_info.h" #include "graph/manager/graph_var_manager.h" #include "runtime/rt.h" #include "single_op/task/build_task_utils.h" namespace ge { namespace { +constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; +constexpr char const *kAttrOpParamSize = "op_para_size"; std::mutex g_reg_mutex; inline void GetKernelName(const OpDescPtr &op_desc, std::string &kernel_name) { @@ -85,9 +83,11 @@ bool KernelBinRegistry::AddKernel(const std::string &stub_name, const KernelHold return ret.second; } -TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const OpDescPtr &op_desc, - const domi::KernelDef &kernel_def) - : op_desc_(op_desc), kernel_def_(kernel_def), stub_name_(model_name + "/" + op_desc->GetName() + "_tvmbin") {} +TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def) + : node_(node), + op_desc_(node->GetOpDesc()), + kernel_def_(kernel_def), + stub_name_(model_name + "/" + node->GetName() + "_tvmbin") {} Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, const SingleOpModelParam ¶m) const { @@ -246,17 +246,11 @@ Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m } Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m) { - uint8_t *args = nullptr; size_t arg_size = kernel_def_.args_size(); - auto rtRet = rtMallocHost(reinterpret_cast(&args), arg_size); - if (rtRet != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "rtMallocHost failed, size = %zu, ret = %d", arg_size, static_cast(rtRet)); - return RT_FAILED; - } - - task.SetKernelArgs(args, arg_size, kernel_def_.block_dim()); + auto args = std::unique_ptr(new (std::nothrow) uint8_t[arg_size]); + GE_CHECK_NOTNULL(args); - rtRet = rtMemcpy(args, arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); + auto rtRet = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); if (rtRet != RT_ERROR_NONE) { GELOGE(RT_FAILED, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast(rtRet)); return RT_FAILED; @@ -266,16 +260,23 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & const auto *args_offset_tmp = reinterpret_cast(context.args_offset().data()); uint16_t offset = *args_offset_tmp; - // copy args - std::vector tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); - void *src_addr = reinterpret_cast(tensor_device_addr_vec.data()); - uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); - rtRet = rtMemcpy(args + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); - if (rtRet != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "rtMemcpy addresses failed, ret = %d", static_cast(rtRet)); - return RT_FAILED; + bool is_dynamic = false; + (void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic); + if (is_dynamic) { + GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task)); + } else { + // copy args + std::vector tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); + void *src_addr = reinterpret_cast(tensor_device_addr_vec.data()); + uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); + rtRet = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); + if (rtRet != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "rtMemcpy addresses failed, ret = %d", static_cast(rtRet)); + return RT_FAILED; + } } + task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim()); return SUCCESS; } @@ -303,4 +304,23 @@ Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶ task.SetStubFunc(stub_name_, stub_func); return SUCCESS; } + +Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) { + GELOGD("Start alloc tiling data of node %s.", op_desc_->GetName().c_str()); + int64_t max_size = -1; + (void)AttrUtils::GetInt(op_desc_, kAttrOpParamSize, max_size); + GELOGD("Got op param size by key: %s, ret = %ld", kAttrOpParamSize, max_size); + if (max_size <= 0) { + GELOGE(PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc_->GetName().c_str(), max_size); + return PARAM_INVALID; + } + + void *tiling_buffer = nullptr; + GE_CHK_RT_RET(rtMalloc(&tiling_buffer, static_cast(max_size), RT_MEMORY_HBM)); + GE_CHECK_NOTNULL(tiling_buffer); + GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); + + task.EnableDynamicSupport(node_, tiling_buffer, static_cast(max_size)); + return SUCCESS; +} } // namespace ge diff --git a/src/ge/single_op/task/tbe_task_builder.h b/src/ge/single_op/task/tbe_task_builder.h index 5e0965bf..7c5f8054 100644 --- a/src/ge/single_op/task/tbe_task_builder.h +++ b/src/ge/single_op/task/tbe_task_builder.h @@ -65,12 +65,13 @@ class KernelBinRegistry { class TbeTaskBuilder { public: - TbeTaskBuilder(const std::string &model_name, const OpDescPtr &op_desc, const domi::KernelDef &kernel_def); + TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def); ~TbeTaskBuilder() = default; Status BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m); private: + Status InitTilingInfo(TbeOpTask &task); Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m); Status GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const; @@ -82,7 +83,8 @@ class TbeTaskBuilder { static Status DoRegisterFunction(void *bin_handle, const char *stub_name, const char *kernel_name); - const OpDescPtr &op_desc_; + const NodePtr node_; + const OpDescPtr op_desc_; const domi::KernelDef &kernel_def_; const std::string stub_name_; }; diff --git a/tests/st/resnet50/common.cc b/tests/st/resnet50/common.cc old mode 100755 new mode 100644 diff --git a/tests/ut/ge/graph/passes/flow_ctrl_pass_unittest.cc b/tests/ut/ge/graph/passes/flow_ctrl_pass_unittest.cc old mode 100755 new mode 100644 diff --git a/tests/ut/ge/graph/passes/folding_kernel/expanddims_kernel_unittest.cc b/tests/ut/ge/graph/passes/folding_kernel/expanddims_kernel_unittest.cc old mode 100755 new mode 100644 diff --git a/tests/ut/ge/graph/passes/merge_pass_unittest.cc b/tests/ut/ge/graph/passes/merge_pass_unittest.cc old mode 100755 new mode 100644 diff --git a/tests/ut/ge/graph/passes/net_output_pass_unittest.cc b/tests/ut/ge/graph/passes/net_output_pass_unittest.cc old mode 100755 new mode 100644 diff --git a/tests/ut/ge/graph/passes/snapshot_pass_unittest.cc b/tests/ut/ge/graph/passes/snapshot_pass_unittest.cc old mode 100755 new mode 100644 diff --git a/tests/ut/ge/single_op/single_op_manager_unittest.cc b/tests/ut/ge/single_op/single_op_manager_unittest.cc old mode 100755 new mode 100644 diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h index 35134faa..023812dd 100644 --- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h +++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h @@ -37,6 +37,7 @@ enum FWKAdptAPIRetCode { FWK_ADPT_SESSION_NOT_EXIST = 10, // session id not exist FWK_ADPT_SESSION_ALREADY_EXIST = 11, // session id alread exist for create session FWK_ADPT_NATIVE_END_OF_SEQUENCE = 12, // end of sequence + FWK_ADPT_EXTEND_TYPE_NOT_EXIST = 13, // extend info type not exist FWK_ADPT_UNKNOWN_ERROR = 99 // unknown error code }; @@ -55,9 +56,17 @@ enum FWKTaskExtInfoType { FWK_ADPT_EXT_SHAPE_TYPE = 0, FWK_ADPT_EXT_INPUT_SHAPE, FWK_ADPT_EXT_OUTPUT_SHAPE, + FWK_ADPT_EXT_UPDATE_ADDR, FWK_ADPT_EXT_INVALID }; +enum FWKExtUpdateAddrType { + FWK_ADPT_UPDATE_NULL = 0, + FWK_ADPT_UPDATE_INPUT, + FWK_ADPT_UPDATE_OUTPUT, + FWK_ADPT_UPDATE_INPUT_OUTPUT +}; + // API Parameter Structure struct StrFWKKernel { FWKOperateType opType; diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index 378eee38..1022880f 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -1100,12 +1100,6 @@ REG_OP(SqrtGrad) .OUTPUT(z, TensorType(UnaryDataType)) .OP_END_FACTORY_REG(SqrtGrad) -REG_OP(Multiply) - .INPUT(x, TensorType({DT_FLOAT,DT_UINT8,DT_INT8,DT_UINT16,DT_INT16,DT_INT32,DT_INT64,DT_DOUBLE,DT_FLOAT16})) - .INPUT(y, TensorType({DT_FLOAT,DT_UINT8,DT_INT8,DT_UINT16,DT_INT16,DT_INT32,DT_INT64,DT_DOUBLE,DT_FLOAT16})) - .OUTPUT(z, TensorType({DT_FLOAT,DT_UINT8,DT_INT8,DT_UINT16,DT_INT16,DT_INT32,DT_INT64,DT_DOUBLE,DT_FLOAT16})) - .OP_END_FACTORY_REG(Multiply) - /** *@brief Returns x + y element-wise. *@par Inputs: @@ -2861,22 +2855,19 @@ REG_OP(SquareSumAll) *@brief Confuse broadcast, addn and mul. *@par Inputs: -*Five inputs, including: -* @li x1: A Tensor. Must be one of the following types:int32 float16, float32. +*Three inputs, including: +* @li x1: A Tensor. Must be one of the following types:int32, int16, float16, float32. * @li x2: A Tensor of the same type as "x1". * @li x3: A Tensor of the same type as "x1". *@par Outputs: -*@li y: A Tensor. Has the same type as "x1". - -*@par Third-party framework compatibility: -* Compatible with the TensorFlow operator LRN. +* y: A Tensor. Has the same type as "x1". */ REG_OP(FusedMulAddN) - .INPUT(x1, TensorType::NumberType()) - .INPUT(x2, TensorType::NumberType()) - .INPUT(x3, TensorType::NumberType()) - .OUTPUT(y, TensorType::NumberType()) + .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16})) + .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16})) + .INPUT(x3, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16})) .OP_END_FACTORY_REG(FusedMulAddN) /** diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index 9b3694f1..59b99841 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -934,7 +934,6 @@ REG_OP(EncodeJpeg) /** *@brief PNG-encode an image. - *@par Inputs: *Input image must be unit8 or uint16 type. Inputs include: \n *image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels] \n @@ -992,6 +991,40 @@ REG_OP(ResizeBilinearV2D) .OP_END_FACTORY_REG(ResizeBilinearV2D) /** +*@brief Resizes "images" to "size" using bilinear interpolation and keep ration at the time. + +*@par Inputs: +* One input: +*images: An NC1HWC0 Tensor. \n +* Must be one of the following types: float16, float32. + +*@par Attributes: +*@li min_dimension: A required int32 attribute for the min dimension for the images. +* No default value. +*@li max_dimension: A required int32 attribute for the max dimension for the images. +* No default value. +*@li align_corners: An optional bool. If "true", the centers of the corner +* pixels of the input and output tensors are aligned. Defaults to "false". +*@li half_pixel_centers: indicates if the offset coordinates are normalized +* Defaults to "false". + +*@par Outputs: +*y: A Tensor with type float32 and the same format as input "images". + +*@attention Constraints: +* The input "images" must be a tensor of 5 elements: images[2] <= 2048, \n +images[3] <= 2048. +*/ +REG_OP(KeepRationResizeBilinear) + .INPUT(images, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT})) + .REQUIRED_ATTR(min_dimension, Int) + .REQUIRED_ATTR(max_dimension, Int) + .ATTR(align_corners, Bool, false) + .ATTR(half_pixel_centers, Bool, false) + .OP_END_FACTORY_REG(KeepRationResizeBilinear) + +/** *@brief Resizes "images" to "size" using nearest neighbor interpolation. *@par Inputs: @@ -1224,6 +1257,16 @@ REG_OP(CombinedNonMaxSuppression) .ATTR(clip_boxes, Bool, true) .OP_END_FACTORY_REG(CombinedNonMaxSuppression) +REG_OP(SpatialTransformerD) + .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16})) + .OPTIONAL_INPUT(theta, TensorType({DT_FLOAT,DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16})) + .ATTR(output_size, ListInt, {-1, -1}) + .ATTR(default_theta, ListFloat, {}) + .ATTR(align_corners, Bool, false) + .ATTR(use_default_theta, ListBool, {}) + .OP_END_FACTORY_REG(SpatialTransformerD) + } // namespace ge #endif // GE_OP_MAGE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/internal_ops.h b/third_party/fwkacllib/inc/ops/internal_ops.h index e3caa45f..8c261382 100644 --- a/third_party/fwkacllib/inc/ops/internal_ops.h +++ b/third_party/fwkacllib/inc/ops/internal_ops.h @@ -34,7 +34,6 @@ namespace ge { *@par Outputs: *The output is dynamic for attribute func_name. */ - REG_OP(AssistHelp) .DYNAMIC_INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE })) @@ -43,6 +42,20 @@ REG_OP(AssistHelp) . REQUIRED_ATTR (func_name, String) . OP_END_FACTORY_REG(AssistHelp) +/** +*@brief aicpu cache help for lhisi cache flush. + +*@par Inputs: +*The input is dynamic for attribute func_name \n + +*@par Outputs: +*The output is dynamic for attribute func_name. +*/ +REG_OP(CacheUpdate) + .INPUT(x, TensorType::BasicType()) + .OUTPUT(x, TensorType::BasicType()) + .OP_END_FACTORY_REG(CacheUpdate) + } // namespace ge #endif // GE_OP_INTERNAL_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index 7cfd762f..7cb24ee7 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -167,50 +167,6 @@ REG_OP(BatchMatMul) .ATTR(adj_x2, Bool, false) .OP_END_FACTORY_REG(BatchMatMul) -REG_OP(MeanCCE) - .INPUT(x, TensorType::ALL()) - .INPUT(indices, TensorType::ALL()) - .OUTPUT(y, TensorType::ALL()) - .ATTR(keep_dims, Bool, false) - .ATTR(value1, ListInt, {}) - .ATTR(mode, Int, 3) // 0:max pooling or 1:avg pooling - .ATTR(pad_mode, Int, 0) - .ATTR(global_pooling, Bool, true) // tensorflow have no attr, set default value - .ATTR(window, ListInt, {1,1}) // kernel size - .ATTR(pad, ListInt, {0,0,0,0}) // pad size - .ATTR(stride, ListInt, {1,1}) // stride size - .ATTR(ceil_mode, Int, 0) - .ATTR(data_mode, Int, 1) - .ATTR(nan_opt, Int, 0) - .ATTR(fomart, Int, 0) - .OP_END_FACTORY_REG(MeanCCE) - -REG_OP(MeanGrad) - .INPUT(x, TensorType::ALL()) - .OUTPUT(y, TensorType::ALL()) - .ATTR(mode, Int, 1) // 0:max pooling or 1:avg pooling - .ATTR(pad_mode, Int, 0) - .ATTR(global_pooling, Bool, false) - .ATTR(window, ListInt, {1,1}) // kernel size - .ATTR(pad, ListInt, {0,0,0,0}) // pad size - .ATTR(stride, ListInt, {1,1}) // stride size - .ATTR(ceil_mode, Int, 0) - .ATTR(data_mode, Int, 1) - .ATTR(nan_opt, Int, 0) - .ATTR(mean_grad_output_shape_value, ListInt, {1,1,1,1}) - .ATTR(mean_grad_output_shape_format, Int, 1) //must be NHWC - .OP_END_FACTORY_REG(MeanGrad) - -REG_OP(MatMulCCE) - .INPUT(x1, TensorType({DT_FLOAT})) - .INPUT(x2, TensorType({DT_FLOAT})) - .OPTIONAL_INPUT(x3, TensorType({DT_FLOAT})) - .OUTPUT(y, TensorType({DT_FLOAT})) - .ATTR(transpose_a, Bool, false) - .ATTR(transpose_b, Bool, false) - .ATTR(has_bias, Bool, false) - .OP_END_FACTORY_REG(MatMulCCE) - /** *@brief Computes half the L2 norm of a tensor without the sqrt. @@ -673,8 +629,9 @@ REG_OP(DiagPart) *@par Attributes: *@li num_output: Reserved. -*@li transpose: A bool, specifying whether to transpose, either "true" or "false". Defaults to "false". -*@li axis: Optional. A int. 1 or 2. +*@li transpose: A bool, specifying weight whether to transpose, either "true" or "false". Defaults to "false". +*@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1. + * The product of the subsequent dimensions starting form first dimension or the second dimension is "K". *@li offset_x: Reserved. *@par Outputs: diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h index 39aaa993..296dd63c 100644 --- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h @@ -22,95 +22,6 @@ namespace ge { /** -*@brief A fusion operator for batchnorm. - -*@par Inputs: -*Ten inputs, including: -* @li x: A Tensor. Must be one of the following types: float32. -* @li scale: A Tensor. Must be one of the following types: float32. -* @li b: A Tensor. Must be one of the following types: float32. -* @li mean: A Tensor. Must be one of the following types: float32. -* @li variance: A Tensor. Must be one of the following types: float32. - -*@par Attributes: -* @li mode: A Tensor. Must be one of the following types: int. defaults: 1. -* @li epsilon: A Tensor. Must be one of the following types: float32. Defaults to 0.000001. -* @li momentum: A Tensor. Must be one of the following types: float32. Defaults to 0.9. -* @li is_training: A Tensor. Must be one of the following types: bool. Defaults to true. -* @li is_training_fusion: A Tensor. Must be one of the following types: bool. Defaults to true. -* @li moving_average_fraction: A Tensor. Must be one of the following types: float32. Defaults to 0.00300002098. - -*@par Outputs: -*Three outputs, including: -* @li y: A Tensor. Must be one of the following types: float32. -* @li running_mean: A Tensor. Must be one of the following types: float32. -* @li running_variance: A Tensor. Must be one of the following types: float32. -* @li save_mean: A Tensor. Must be one of the following types: float32. -* @li save_inv_variance: A Tensor. Must be one of the following types: float32. -* @li save_inv_variance1: A Tensor. Must be one of the following types: float32. - -*/ -REG_OP(FusedBatchNorm) - .INPUT(x, TensorType{DT_FLOAT}) - .INPUT(scale, TensorType{DT_FLOAT}) - .INPUT(b, TensorType{DT_FLOAT}) - .INPUT(mean, TensorType{DT_FLOAT}) - .INPUT(variance, TensorType{DT_FLOAT}) - .OUTPUT(y, TensorType{DT_FLOAT}) - .OUTPUT(running_mean, TensorType{DT_FLOAT}) - .OUTPUT(running_variance, TensorType{DT_FLOAT}) - .OUTPUT(save_mean, TensorType{DT_FLOAT}) - .OUTPUT(save_inv_variance, TensorType{DT_FLOAT}) - .OUTPUT(save_inv_variance1, TensorType{DT_FLOAT}) - .ATTR(mode, Int, 1) - .ATTR(epsilon, Float, 1e-5f) - .ATTR(momentum, Float, 0.9) - .ATTR(is_training, Bool, true) - .ATTR(is_training_fusion, Bool, true) - .ATTR(moving_average_fraction, Float, 0.00300002098) - .OP_END_FACTORY_REG(FusedBatchNorm) - -/** -*@brief A fusion operator for batchnorm. - -*@par Inputs: -*Ten inputs, including: -* @li dy: A Tensor. Must be one of the following types: float32. -* @li x: A Tensor. Must be one of the following types: float32. -* @li scale: A Tensor. Must be one of the following types: float32. -* @li save_mean: A Tensor. Must be one of the following types: float32. -* @li save_inv_variance: A Tensor. Must be one of the following types: float32. -* @li save_inv_variance1: A Tensor. Must be one of the following types: float32. - -*@par Attributes: -* @li epsilon: A Tensor. Must be one of the following types: float32. Defaults to 0.0. -* @li momentum: A Tensor. Must be one of the following types: float32. Defaults to 0.0. - -*@par Outputs: -*Three outputs, including: -* @li dx: A Tensor. Must be one of the following types: float32. -* @li bn_scale: A Tensor. Must be one of the following types: float32. -* @li bn_bias: A Tensor. Must be one of the following types: float32. - -*@par Third-party framework compatibility -* Compatible with the L2 scenario of PyTorch operator Normalize. -*/ - -REG_OP(FusedBatchNormGrad) - .INPUT(dy, TensorType{DT_FLOAT}) - .INPUT(x, TensorType{DT_FLOAT}) - .INPUT(scale, TensorType{DT_FLOAT}) - .INPUT(save_mean, TensorType{DT_FLOAT}) - .INPUT(save_inv_variance, TensorType{DT_FLOAT}) - .INPUT(save_inv_variance1, TensorType{DT_FLOAT}) - .OUTPUT(dx, TensorType{DT_FLOAT}) - .OUTPUT(bn_scale, TensorType{DT_FLOAT}) - .OUTPUT(bn_bias, TensorType{DT_FLOAT}) - .ATTR(epsilon, Float, 0.0) - .ATTR(momentum, Float, 0.0) - .OP_END_FACTORY_REG(FusedBatchNormGrad) - -/** *@brief Normalizes elements of a specific dimension of eigenvalues (L2). *@par Inputs: diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index 5818e14b..e9180332 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -312,53 +312,6 @@ REG_OP(DepthwiseConv2D) .ATTR(offset_x, Int, 0) .OP_END_FACTORY_REG(DepthwiseConv2D) -REG_OP(Conv2DCCE) - .INPUT(x, TensorType{DT_FLOAT}) // The input tensor - .INPUT(w, TensorType({DT_FLOAT, DT_INT8})) // The weight tensor ,If QuantType =1 ,shall use type""tensor(int8) - .OPTIONAL_INPUT(b, TensorType{DT_FLOAT}) // Optional 1D bias to be added to the convolution, has size of M. - .OUTPUT(y, TensorType{DT_FLOAT}) // The output tensor - .ATTR(mode, Int, 1) - .ATTR(group, Int, 1) // number of groups input channels and output channels are divided into - .ATTR(num_output, Int, 0) // number of output tensor - .ATTR(pad, ListInt, {0, 0, 0, 0}) // Padding for the beginning and ending along each axis - .ATTR(kernel, ListInt, {0, 0}) - .ATTR(stride, ListInt, {1, 1}) // Stride along each axis. - .ATTR(dilation, ListInt, {1, 1}) // dilation value along each axis of the filter. - .ATTR(pad_mode, Int, 0) // pad mode, 0:NOTSET, 1:SAME_UPPER, SAME_LOWER or 2:VALID.defaul default value is 0:NOTSET - .ATTR(algo, Int, 2) - .OP_END_FACTORY_REG(Conv2DCCE) - -REG_OP(Conv2DBackpropFilterCCE) - .INPUT(x, TensorType{DT_FLOAT}) - .INPUT(filter_sizes, TensorType{DT_INT8}) - .INPUT(out_backprop, TensorType{DT_FLOAT}) - .OUTPUT(y, TensorType{DT_FLOAT}) - .ATTR(conv_grad_filter_output_shape, ListInt, {0, 0, 0, 0}) - .ATTR(mode, Int, 1) - .ATTR(group, Int, 1) - .ATTR(pad, ListInt, {0, 0, 0, 0}) - .ATTR(stride, ListInt, {1, 1}) - .ATTR(dilation, ListInt, {1, 1}) - .ATTR(padding, Int, 0) //pad_mode:same valid - .ATTR(algo, Int, 0) - .OP_END_FACTORY_REG(Conv2DBackpropFilterCCE) - -REG_OP(Conv2DBackpropInputCCE) - .INPUT(input_sizes, TensorType{DT_INT8}) - .INPUT(filter, TensorType{DT_FLOAT}) - .INPUT(out_backprop, TensorType{DT_FLOAT}) - .OUTPUT(output, TensorType{DT_FLOAT}) - .ATTR(conv_grad_input_output_shape, ListInt, {0, 0, 0, 0}) - .ATTR(mode, Int, 1) - .ATTR(format, Int, 0) - .ATTR(group, Int, 1) - .ATTR(pad_mode, Int, 0) - .ATTR(stride, ListInt, {1, 1}) - .ATTR(dilation, ListInt, {1, 1}) - .ATTR(pad, ListInt, {0, 0, 0, 0}) - .ATTR(algo, Int, 0) - .OP_END_FACTORY_REG(Conv2DBackpropInputCCE) - /** *@brief Performs the the backward operation for "BiasAdd" on the "bias" tensor. * It accumulates all the values from out_backprop into the feature @@ -581,8 +534,6 @@ REG_OP(Conv2DBackpropFilterD) | |---------|---------|---------|----------|-------- | | float32 | float32 | float32 | _ | float32 | |---------|---------|---------|----------|-------- - | | float64 | float64 | float64 | _ | float64 - | |---------|---------|---------|----------|-------- | | int8 | int8 | int32 | int8 | int32 -----------|---------|---------|---------|----------|-------- |Format | NCHW | NCHW | ND | ND | NCHW @@ -602,7 +553,7 @@ REG_OP(Conv2DBackpropFilterD) * for dilated convolution. Has the same dimension order and value as "strides". * @li groups: Number of blocked connections from input channels to output * channels. Input channels and output channels must both be divisible by -* "groups".Type is int32. Must be set to 1. +* "groups".Type is int32. * @li offset_x: An optional integer for quantized convolution. Type is int32. Defaults to "0". * @li data_format: An optional string from: "NHWC", "NCHW". Specifying the * data format of the input and output images. Type is string. Defaults to "NHWC". Reserved. @@ -654,11 +605,11 @@ REG_OP(Conv2DBackpropFilterD) *@li Compatible with the Caffe operator 2D "Convolution". */ REG_OP(Conv2D) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) - .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) + .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) .ATTR(dilations, ListInt, {1, 1, 1, 1}) @@ -684,36 +635,46 @@ REG_OP(Conv2DCompress) /** *@brief Computes a 3D convolution given 5D "x" and "filter" tensors. -*@par Inputs: -*@li x: A 5D tensor. Must be one of the following types: float16, float32, float64. The format is NCDHW or NDHWC. -*@li filter: A 5D tensor of the same type as "x". The format is NCDHW, NDHWC or DHWCN. -*@li bias: An optional 1D tensor of the same type as "x". + *@par Inputs: + * @li x: A 5D tensor. Must be one of the following types: float16, float32, float64. The format is NCDHW or NDHWC. + * @li filter: A 5D tensor of the same type as "x". The format is NCDHW, NDHWC or DHWCN. + +*@par Optional input: + * @li bias: An optional 1D tensor of the same type as "x". + * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved. + +*@par Required Attributes: +* @li strides: A list of 5 ints. Specifies the stride of the sliding window for each dimension of "x". The N and C dimensions must be 1. Has the same format as "x". +* @li pads: A list of 6 ints. Supports only padding along the D, H and W dimensions in sequence of head, tail, top, bottom, left and right. *@par Attributes: -*@li strides: A list of 5 ints. Specifies the stride of the sliding window for each dimension of "x". The N and C dimensions must be 1. Has the same format as "x". -*@li pads: A list of 6 ints. Supports only padding along the D, H and W dimensions in sequence of head, tail, top, bottom, left and right. -*@li data_format: An optional string from: "NDHWC", "NCDHW". Defaults to "NDHWC". Specify the data format of the input and output data. -*@li dilations: A list of 5 ints. Specifies the dilation factor for each dimension of "x". The N and C dimensions must be 1. Has the same format as "x". + * @li groups: Number of blocked connections from input channels to output channels. + * @li data_format: An optional string from: "NDHWC", "NCDHW". Defaults to "NDHWC". Specify the data format of the input and output data. + * @li dilations: A list of 5 ints. Specifies the dilation factor for each dimension of "x". The N and C dimensions must be 1. Has the same format as "x". + * @li offset_x: An optional int. Input offset, used for quantized inference. Defaults to 0. *@par Outputs: -*y: A Tensor. Has the same type as "x". + *y: A Tensor. Has the same type as "x". -*@attention Constraints:\n -*The image size after padding is greater than the filter size.\n +*@attention Constraints: + *The image size after padding is greater than the filter size. *@par Third-party framework compatibility -*@li Compatible with the TensorFlow operator conv3d. -*@li Compatible with the Caffe operator Convolution. + * @li Compatible with the TensorFlow operator conv3d. + * @li Compatible with the Caffe operator Convolution. */ REG_OP(Conv3D) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) - .ATTR(data_format, String, "NDHWC") .ATTR(dilations, ListInt, {1, 1, 1, 1, 1}) + .ATTR(groups, Int, 1) + .ATTR(data_format, String, "NDHWC") + .ATTR(offset_x, Int, 0) .OP_END_FACTORY_REG(Conv3D) /** @@ -723,28 +684,35 @@ REG_OP(Conv3D) * @li input_size: A Tensor of type int32, int64. An integer vector representing the shape of input, * where input is a 5-D tensor [batch, depth, height, width, channels] or [batch, channels, depth, height, width]. * @li filter: A Tensor. Must be one of the following types: float16, float32, float64. - * @li grads: A Tensor. Must have the same type as filter. 5-D with shape [batch, depth, out_height, out_width, out_channels] + * @li out_backprop: A Tensor. Must have the same type as filter. 5-D with shape [batch, depth, out_height, out_width, out_channels] * or [batch, out_channels, depth, out_height, out_width]. Gradients with respect to the output of the convolution. + +*@par Required Attributes: + * @li strides: A list of 5 ints. Specifies the stride of the sliding window for each dimension of "x". The N and C dimensions must be 1. Has the same format as "x". + * @li pads: A list of 6 ints. Supports only padding along the D, H and W dimensions in sequence of head, tail, top, bottom, left and right. + *@par Attributes: - * Four attributes: - * @li strides: A tuple/list of 3 integers. The stride of the sliding window for D/H/W dimension. - * @li pads: A tuple/list of 6 integers - * @li dilations: A tuple/list of 6 integers, The dilation factor for each dimension of input, now only support [1,1,1,1,1] + * Three attributes: + * @li groups: Number of blocked connections from input channels to output channels. * @li data_format: An optional string from: "NDHWC", "NCHWD". Defaults to "NDHWC". Specify the data format of the input and output data. + * @li dilations: A tuple/list of 6 integers, The dilation factor for each dimension of input, now only support [1,1,1,1,1] + *@par Outputs: * y: A Tensor. Has the same type as filter,and has same format as input_size + *@par Third-party framework compatibility * Compatible with Tensorflow's conv3d_backprop_input */ REG_OP(Conv3DBackpropInput) .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) - .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) - .ATTR(data_format, String, "NDHWC") .ATTR(dilations, ListInt, {1, 1, 1, 1, 1}) + .ATTR(groups, Int, 1) + .ATTR(data_format, String, "NDHWC") .OP_END_FACTORY_REG(Conv3DBackpropInput) /** @@ -752,46 +720,56 @@ REG_OP(Conv3DBackpropInput) *@par Inputs: * Two inputs: * @li filter: A Tensor. Types is float16. - * @li grads: A Tensor. Must have the same type as filter. + * @li out_backprop: A Tensor. Must have the same type as filter. + +*@par Required Attributes: + *@li strides: A list of 5 ints. Specifies the stride of the sliding window for + each dimension of "x". The N and C dimensions must be 1. Has the same format as "x". + *@li pads: A list of 6 ints. Supports only padding along the D, H and W + dimensions in sequence of head, tail, top, bottom, left and right. + *@li input_size: A Tensor of type int32, int64. An integer vector representing the shape of input, + * where input is a 5-D tensor [batch, depth, height, width, channels] or [batch, channels, depth, height, width]. + *@par Attributes: - * Five attributes: - * @li input_size A Tensor of type int32. An integer vector representing the shape of input, - * @li strides: A tuple/list of 3 integers. The stride of the sliding window for D/H/W dimension. - * @li pads: A tuple/list of 4 integers - * @li dilations: A tuple/list of 5 integers, The dilation factor for each dimension of input, now only support [1,1,1,1,1] + * Three attributes: + * @li groups: Number of blocked connections from input channels to output channels. * @li data_format: An optional string from: "NDHWC", "NCHWD". Defaults to "NDHWC". Specify the data format of the input and output data. + * @li dilations: A tuple/list of 5 integers, The dilation factor for each dimension of input, now only support [1,1,1,1,1] *@par Outputs: * y: A Tensor. Has the same type as filter *@par Third-party framework compatibility * Compatible with Tensorflow's conv3d_backprop_input */ + + REG_OP(Conv3DBackpropInputD) .INPUT(filter, TensorType({DT_FLOAT16})) - .INPUT(grads, TensorType({DT_FLOAT16})) + .INPUT(out_backprop, TensorType({DT_FLOAT16})) .OUTPUT(y, TensorType({DT_FLOAT16})) .REQUIRED_ATTR(input_size, ListInt) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) - .ATTR(data_format, String, "NDHWC") .ATTR(dilations, ListInt, {1, 1, 1, 1, 1}) + .ATTR(groups, Int, 1) + .ATTR(data_format, String, "NDHWC") .OP_END_FACTORY_REG(Conv3DBackpropInputD) REG_OP(LSTM) - .INPUT(x, TensorType({DT_FLOAT16})) - .INPUT(cont, TensorType({DT_FLOAT32,DT_FLOAT16})) - .INPUT(w_x, TensorType({DT_FLOAT16})) - .INPUT(bias, TensorType({DT_FLOAT16,DT_FLOAT32,DT_INT16,DT_INT32})) - .INPUT(w_h, TensorType({DT_FLOAT16})) - .OPTIONAL_INPUT(x_static, TensorType({DT_FLOAT16})) - .OPTIONAL_INPUT(h_0, TensorType({DT_FLOAT16,DT_FLOAT32})) - .OPTIONAL_INPUT(c_0, TensorType({DT_FLOAT16,DT_FLOAT32})) - .OPTIONAL_INPUT(w_x_static, TensorType({DT_FLOAT16})) - .OUTPUT(h, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(h_t, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(c_t, TensorType({DT_FLOAT16, DT_FLOAT})) - .ATTR(num_output, Int, 0) - .ATTR(expose_hidden, Bool, false) - .OP_END_FACTORY_REG(LSTM) + .INPUT(x, TensorType({DT_FLOAT16})) + .INPUT(cont, TensorType({DT_FLOAT32,DT_FLOAT16})) + .INPUT(w_x, TensorType({DT_FLOAT16})) + .INPUT(bias, TensorType({DT_FLOAT16,DT_FLOAT32,DT_INT16,DT_INT32})) + .INPUT(w_h, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(x_static, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(h_0, TensorType({DT_FLOAT16,DT_FLOAT32})) + .OPTIONAL_INPUT(c_0, TensorType({DT_FLOAT16,DT_FLOAT32})) + .OPTIONAL_INPUT(w_x_static, TensorType({DT_FLOAT16})) + .OUTPUT(h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(h_t, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(c_t, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(num_output, Int, 0) + .ATTR(expose_hidden, Bool, false) + .OP_END_FACTORY_REG(LSTM) /** *@brief Computes the gradients of convolution3D with respect to the filter @@ -851,6 +829,8 @@ REG_OP(Conv3DBackpropFilter) *@par Third-party framework compatibility * Compatible with Tensorflow's conv3d_backprop_filter */ + + REG_OP(Conv3DBackpropFilterD) .INPUT(x, TensorType({DT_FLOAT16})) .INPUT(out_backprop, TensorType({DT_FLOAT16})) @@ -862,5 +842,86 @@ REG_OP(Conv3DBackpropFilterD) .ATTR(groups, Int, 1) .ATTR(data_format, String, "NDHWC") .OP_END_FACTORY_REG(Conv3DBackpropFilterD) + +/** +*@brief Computes the transpose of convolution 3d with respect to the input. +*@par Inputs: + * Five inputs: + * @li input_size: A Tensor of type int32. An integer vector representing the shape of input + * @li x: A Tensor. + * @li filter: A Tensor. Types is float16. + * @li bias: An optional 1D tensor of the same type as "x". + * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved. + +*@par Required Attributes: + * @li strides: A tuple/list of 3 integers. The stride of the sliding window for D/H/W dimension. + * @li pads: A tuple/list of 6 integers +*@par Attributes: + * Five attributes: + * @li groups: Number of blocked connections from input channels to output channels. + * @li dilations: A tuple/list of 5 integers, The dilation factor for each dimension of input, now only support [1,1,1,1,1] + * @li data_format: An optional string from: "NDHWC", "NCHWD". Defaults to "NDHWC". Specify the data format of the input and output data. + * @li output_padding: The size will be added in the output shape. + * @li offset_x: Input offset_x value +*@par Outputs: + * y: A Tensor. Has the same type as filter +*/ +REG_OP(Conv3DTranspose) + .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(pads, ListInt) + .ATTR(dilations, ListInt, {1, 1, 1, 1, 1}) + .ATTR(groups, Int, 1) + .ATTR(data_format, String, "NDHWC") + .ATTR(output_padding, ListInt, {0, 0, 0, 0, 0}) + .ATTR(offset_x, Int, 0) + .OP_END_FACTORY_REG(Conv3DTranspose) + +/** +*@brief Computes the transpose of convolution 3d with respect to the input. +*@par Inputs: + * Four inputs: + * @li x: A Tensor. + * @li filter: A Tensor. Types is float16. + * @li bias: An optional 1D tensor of the same type as "x". + * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved. + +*@par Required Attributes: + * @li input_size: A Tensor of type int32. An integer vector representing the shape of input + * @li strides: A tuple/list of 3 integers. The stride of the sliding window for D/H/W dimension. + * @li pads: A tuple/list of 6 integers +*@par Attributes: + * Five attributes: + * @li dilations: A tuple/list of 5 integers, The dilation factor for each dimension of input, now only support [1,1,1,1,1] + * @li groups: Number of blocked connections from input channels to output channels. + * @li data_format: An optional string from: "NDHWC", "NCHWD". Defaults to "NDHWC". Specify the data format of the input and output data. + * @li output_padding: The size will be added in the output shape. + * @li offset_x: Input offset_x value +*@par Outputs: + * y: A Tensor. Has the same type as filter +*/ + + +REG_OP(Conv3DTransposeD) + .INPUT(x, TensorType({DT_FLOAT16})) + .INPUT(filter, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) + .OUTPUT(y, TensorType({DT_FLOAT16})) + .REQUIRED_ATTR(input_size, ListInt) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(pads, ListInt) + .ATTR(dilations, ListInt, {1, 1, 1, 1, 1}) + .ATTR(groups, Int, 1) + .ATTR(data_format, String, "NDHWC") + .ATTR(output_padding, ListInt, {0, 0, 0, 0, 0}) + .ATTR(offset_x, Int, 0) + .OP_END_FACTORY_REG(Conv3DTransposeD) + } // namespace ge #endif // GE_OP_NN_CALCULATION_OPS_H diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index ceb92f7a..0a91e237 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -735,6 +735,65 @@ REG_OP(YoloV3DetectionOutputD) .OP_END_FACTORY_REG(YoloV3DetectionOutputD) /** +*@brief Performs YOLO V3 detection. + +*@par Inputs: +*16 Input, including: +*@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput. \n +A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. +*@li imginfo: A float16, describing the image information including the required image height and width \n +and the actual image height and width. +*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively. + +*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively. + +* +*@par Attributes: +*@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" +*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. +*@li coords: Specifies the number of coordinate parameters. Must be 4. +*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80]. +*@li relative: An optional bool. Defaults to and must be "true". +*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0]. +*@li post_nms_topn: An optional int32. This attribute is reserved. +*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. +*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n +*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". +* +*@par Outputs: +*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn], describing the information of each output box. +* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. +*@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. +* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 +* +*@attention Constraints:\n +*@li This operator applies only to the YOLO v3 network. +*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators. +*@see Yolo() +*@par Third-party framework compatibility +* It is a custom operator. It has no corresponding operator in Caffe. +*/ +REG_OP(YoloV3DetectionOutputV2) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT})) + .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT})) + .REQUIRED_ATTR(biases, ListFloat) + .ATTR(boxes, Int, 3) + .ATTR(coords, Int, 4) + .ATTR(classes, Int, 80) + .ATTR(relative, Bool, true) + .ATTR(obj_threshold, Float, 0.5) + .ATTR(post_nms_topn, Int, 512) + .ATTR(score_threshold, Float, 0.5) + .ATTR(iou_threshold, Float, 0.45) + .ATTR(pre_nms_topn, Int, 512) + .ATTR(N, Int, 10) + .ATTR(resize_origin_img_to_net, Bool, false) + .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(box_out_num, TensorType({DT_INT32})) + .OP_END_FACTORY_REG(YoloV3DetectionOutputV2) + +/** *@brief Spatial Pyramid Pooling, multi-level pooling. * Pooling out(n, sigma(c*2^i*2^i)) tensor, i in range[0,pyramid_height). @@ -1084,6 +1143,131 @@ REG_OP(DecodeWheelsTarget) .OUTPUT(boundary_encoded, TensorType({DT_FLOAT16})) .OP_END_FACTORY_REG(DecodeWheelsTarget) +/** +*@brief Computes nms for input boxes and score, support multiple batch and classes. +* will do clip to window, score filter, top_k, and nms + +*@par Inputs: +* Four inputs, including: \n +*@li boxes: boxes, a 4D Tensor of type float16 with +* shape (batch, num_anchors, num_classes, 4). "batch" indicates the batch size of image, +* and "num_anchors" indicates num of boxes, and "num_classes" indicates classes of detect. +* and the value "4" refers to "x0", "x1", "y0", and "y1". +*@li scores: boxes, a 4D Tensor of type float16 with +* shape (batch, num_anchors, num_classes). +*@li clip_window: window size, a 2D Tensor of type float16 with +* shape (batch, 4). 4" refers to "anchor_x0", "anchor_x1", "anchor_y0", and "anchor_y1". +*@li num_valid_boxes: valid boxes number for each batch, a 1D Tensor of type int32 with +* shape (batch,). + +*@par Attributes: +*@li score_threshold: A required attribute of type float32, specifying the score filter iou iou_threshold. +*@li iou_threshold: A required attribute of type float32, specifying the nms iou iou_threshold. +*@li max_size_per_class: A required attribute of type int, specifying the nms output num per class. +*@li max_total_size: A required attribute of type int, specifying the the nms output num per batch. +*@li change_coordinate_frame: A required attribute of type bool, whether to normalize coordinates after clipping. +*@li transpose_box: A required attribute of type bool, whether inserted transpose before this op. + +*@par Outputs: +*@li nmsed_boxes: A 3D Tensor of type float16 with shape (batch, max_total_size, 4), +* specifying the output nms boxes per batch. +*@li nmsed_scores: A 2D Tensor of type float16 with shape (N, 4), +* specifying the output nms score per batch. +*@li nmsed_classes: A 2D Tensor of type float16 with shape (N, 4), +* specifying the output nms class per batch. +*@li nmsed_num: A 1D Tensor of type float16 with shape (N, 4), specifying the valid num of nmsed_boxes. + +*@attention Constraints: +* Only computation of float16 data is supported. +*/ +REG_OP(BatchMultiClassNonMaxSuppression) + .INPUT(boxes, TensorType({DT_FLOAT16})) + .INPUT(scores, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(clip_window, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(num_valid_boxes, TensorType({DT_INT32})) + .OUTPUT(nmsed_boxes, TensorType({DT_FLOAT16})) + .OUTPUT(nmsed_scores, TensorType({DT_FLOAT16})) + .OUTPUT(nmsed_classes, TensorType({DT_FLOAT16})) + .OUTPUT(nmsed_num, TensorType({DT_INT32})) + .REQUIRED_ATTR(score_threshold, Float) + .REQUIRED_ATTR(iou_threshold, Float) + .REQUIRED_ATTR(max_size_per_class, Float) + .REQUIRED_ATTR(max_total_size, Float) + .ATTR(change_coordinate_frame, Bool, false) + .ATTR(transpose_box, Bool, false) + .OP_END_FACTORY_REG(BatchMultiClassNonMaxSuppression) + +/** +* @brief To absolute the bounding box. + +* @par Inputs: +* @li normalized_boxes: A 3D Tensor of type float16 or float32. +* @li shape_hw: A 1D Tensor of type int32. + +* @par Attributes: +* @li reversed_box: An optional bool, specifying the last two dims is "4,num" or +* "num,4", "true" for "4,num", "false" for "num,4". Defaults to "false". + +* @par Outputs: +* y: A Tensor. Has the same type and shape as "normalized_boxes". + +* @attention Constraints: +* "normalized_boxes"'s shape must be (batch,num,4) or (batch,4,num). +* "shape_hw"'s shape must be (4,) +*/ +REG_OP(ToAbsoluteBBox) + .INPUT(normalized_boxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(shape_hw, TensorType({DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(reversed_box, Bool, false) + .OP_END_FACTORY_REG(ToAbsoluteBBox) + +/** +*@brief Computes Normalize bbox function. +* +*@par Inputs: +*Inputs include: +* @li boxes: A Tensor. Must be float16 or float32. +* @li shape_hw: A Tensor. Must be int32. +* +*@par Attributes: +* reversed_box: optional, bool. Defaults to "False" +* +*@par Outputs: +* y: A Tensor. Must have the same type and shape as boxes. +*/ +REG_OP(NormalizeBBox) + .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(shape_hw, TensorType({DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(reversed_box, Bool, false) + .OP_END_FACTORY_REG(NormalizeBBox) + +/** +*@brief Computes decode bboxv2 function. +* +*@par Inputs: +*Inputs include: +* @li boxes: A Tensor. Must be float16 or float32. +* @li anchors: A Tensor. Must be int32. +* +*@par Attributes: +* @li scales: optional, listfloat, . +* @li decode_clip: optional, float, threahold of decode process. +* @li reversed_boxes: optional, bool,. +* +*@par Outputs: +* y: A Tensor. Must have the same type as box_predictions. +*/ +REG_OP(DecodeBboxV2) + .INPUT(boxes, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(anchors, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) + .ATTR(scales, ListFloat, {1.0, 1.0, 1.0, 1.0}) + .ATTR(decode_clip, Float, 0.0) + .ATTR(reversed_box, Bool, false) + .OP_END_FACTORY_REG(DecodeBboxV2) + } // namespace ge #endif // GE_OP_NN_DETECT_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index d18a4fa4..f5b20cdd 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -44,16 +44,6 @@ REG_OP(LogSoftmaxGrad) .ATTR(axis, ListInt, {-1}) .OP_END_FACTORY_REG(LogSoftmaxGrad) -REG_OP(SparseSoftmaxCrossEntropyWithLogitsCCE) - .INPUT(features, TensorType{DT_FLOAT}) - .INPUT(labels, TensorType{DT_FLOAT}) - .OUTPUT(out, TensorType{DT_FLOAT}) - .OUTPUT(non, TensorType{DT_FLOAT}) - .ATTR(cross_entropy_is_grad, Bool, 0) - .ATTR(cross_entropy_mode, Int, 1) - .ATTR(softmax_cross_entropy_lossscale_div_batch, Float, 1.0) - .OP_END_FACTORY_REG(SparseSoftmaxCrossEntropyWithLogitsCCE) - /** *@brief Computes sparse softmax cross entropy cost and gradients to backpropagate. @@ -330,22 +320,6 @@ REG_OP(LogSoftmaxV2) .ATTR(axes, ListInt, {-1}) .OP_END_FACTORY_REG(LogSoftmaxV2) -REG_OP(FusedBatchNormV2) - .INPUT(x, TensorType{DT_FLOAT}) /* Input data tensor from the previous operator"" */ - .INPUT(scale, TensorType{DT_FLOAT}) /* If spatial is true, the dimension of bias is (C) If spatial is false, the dimensions of scale are (C x D1 x ... x Dn)*/ - .INPUT(b, TensorType{DT_FLOAT}) /* If spatial is true, the dimension of bias is (C) If spatial is false, the dimensions of scale are (C x D1 x ... x Dn)*/ - .OPTIONAL_INPUT(mean, TensorType{DT_FLOAT}) /* If spatial is true, the dimension of the running mean (training) or the estimated mean (testing) is (C).If spatial is false, the dimensions of the running mean (training) or the estimated mean (testing) are (C x D1 x ... x Dn)*/ - .OPTIONAL_INPUT(variance, TensorType{DT_FLOAT}) /* If spatial is true, the dimension of the running variance(training) or the estimated variance (testing) is (C). If spatial is false, the dimensions of the running variance(training) or the estimated variance (testing) are (C x D1 x ... x Dn).*/ - .OUTPUT(y, TensorType{DT_FLOAT}) /* The output tensor of the same shape as X */ - .ATTR(momentum, Float, 0.9) // Factor used in computing the running mean and variance. - .ATTR(epsilon, Float, 1e-5f) // The epsilon value to use to avoid division by zero - .ATTR(mode, Int, 1) // 1 means using "CC_BATCHNORM_SPATIAL"; 0 means using "CC_BATCHNORM_PER_ACTIVATION"; only support 1 now - .ATTR(use_global_stats, Bool, true) - .ATTR(alpha, Float, 1) - .ATTR(beta, Float, 0) - .OP_END_FACTORY_REG(FusedBatchNormV2) - - /** *@brief Confuse mul, sum and sub. @@ -683,7 +657,7 @@ REG_OP(Scale) *@li alpha: An optional float32. A scaling factor, usually positive. * Defaults to "1.0". *@li beta: An optional float32. An exponent. Defaults to "0.75" for the caffe framework, Defaults to "0.5" for others. -*@li norm_region: An optional string. A mode option. "ACROSS_CHANNELS":0, "WITHIN_CHANNEL":1. Defaults to "ACROSS_CHANNELS". +*@li norm_region: An optional string. A mode option. "ACROSS_CHANNELS":0. Defaults to "ACROSS_CHANNELS". *@par Outputs: *y: A Tensor. Has the same data type and shape as "x". diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index 693e51d1..98c4b246 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -614,35 +614,7 @@ REG_OP(AvgPoolGradD) .OP_END_FACTORY_REG(AvgPoolGradD) -REG_OP(MaxPoolWithArgmaxCCE) - .INPUT(x, TensorType::ALL()) - .OUTPUT(y, TensorType::ALL()) - .OUTPUT(argmax, TensorType::ALL()) - .ATTR(mode, Int, 0) - .ATTR(pad_mode, Int, 0) - .ATTR(window, ListInt, {1,1}) - .ATTR(stride, ListInt, {1,1}) - .ATTR(pad, ListInt, {0,0,0,0}) - .ATTR(ceil_mode, Int, 0) - .ATTR(data_mode, Int, 1) - .ATTR(nan_opt, Int, 0) - .OP_END_FACTORY_REG(MaxPoolWithArgmaxCCE) - -REG_OP(MaxPoolGradWithArgmaxCCE) - .INPUT(x, TensorType::ALL()) - .INPUT(grad,TensorType::ALL()) - .INPUT(arg,TensorType::ALL()) - .OUTPUT(output,TensorType::ALL()) - .ATTR(mode, Int, 0) - .ATTR(max_pool_grad_output_shape, ListInt, {0,0,0,0}) - .ATTR(pad_mode, Int, 0) - .ATTR(window, ListInt, {1,1}) - .ATTR(stride, ListInt, {1,1}) - .ATTR(pad, ListInt, {0,0,0,0}) - .ATTR(ceil_mode, Int, 0) - .ATTR(data_mode, Int, 1) - .ATTR(nan_opt, Int, 0) - .OP_END_FACTORY_REG(MaxPoolGradWithArgmaxCCE) + /** *@brief :upsample the layer diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h index 368054f5..cc17103c 100644 --- a/third_party/fwkacllib/inc/ops/nn_training_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h @@ -307,16 +307,6 @@ REG_OP(ApplyMomentum) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ApplyMomentum) -REG_OP(ApplyMomentumCCE) - .INPUT(var, TensorType::NumberType()) - .INPUT(accum, TensorType::NumberType()) - .INPUT(lr, TensorType::NumberType()) - .INPUT(grad, TensorType::NumberType()) - .INPUT(momentum, TensorType::NumberType()) - .OUTPUT(var, TensorType::NumberType()) - .ATTR(use_nesterov, Bool, false) - .ATTR(use_locking, Bool, false) - .OP_END_FACTORY_REG(ApplyMomentumCCE) /** *@brief Updates "var" according to the momentum scheme. Set use_nesterov = True if you diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index a8aed058..8cf9f342 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -880,22 +880,22 @@ REG_OP(GNTrainingReduce) * Eight inputs, including: (NCHW NHWC supported) *@li x: A Tensor of type float16 or float32. *@li sum: A 5D Tensor of type float32, -shape is [N, G, D, 1, 1] for NCHW, [N, 1, 1, G, D] for NHWC +shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC for the output of operator GNTrainingReduce. *@li square_sum: A 5D Tensor of type float32, -shape is [N, G, D, 1, 1] for NCHW, [N, 1, 1, G, D] for NHWC +shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC for the output of operator GNTrainingReduce. *@li scale: A 5D Tensor of type float32, -shape is [1, G, D, 1, 1] for NCHW, [1, 1, 1, G, D] for NHWC +shape is [1, G, 1, 1, 1] for NCHW, [1, 1, 1, G, 1] for NHWC is for the scaling gamma. *@li offset: A 5D Tensor of type float32, -shape is [1, G, D, 1, 1] for NCHW, [1, 1, 1, G, D] for NHWC +shape is [1, G, 1, 1, 1] for NCHW, [1, 1, 1, G, 1] for NHWC for the scaling beta. *@li mean: A 5D Tensor of type float32, -shape is [N, G, D, 1, 1] for NCHW, [N, 1, 1, G, D] for NHWC +shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC for the updated mean. *@li variance: A 5D Tensor of type float32, -shape is [N, G, D, 1, 1] for NCHW, [N, 1, 1, G, D] for NHWC +shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC for the updated variance. diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index bbe203cd..c2e6f13a 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -1004,9 +1004,8 @@ REG_OP(StridedSliceAssign) * @par Inputs: * Two inputs, including: -* @li var: A mutable ND Tensor of type BasicType. -* @li input_value: A mutable ND "Tensor" of type BasicType. - +* @li var: A mutable ND Tensor of the following types:int32, int16, float16, float32. +* @li input_value: A mutable ND "Tensor" of the following types:int32, int16, float16, float32. * @par Attributes: * @li begin: A required list of ints. @@ -1030,9 +1029,9 @@ REG_OP(StridedSliceAssign) * @see StridedSlice() */ REG_OP(StridedSliceAssignD) - .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) - .INPUT(input_value, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) - .OUTPUT(var, TensorType(BasicType)) + .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16})) + .INPUT(input_value, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16})) + .OUTPUT(var, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT16})) .REQUIRED_ATTR(begin, ListInt) .REQUIRED_ATTR(end, ListInt) .REQUIRED_ATTR(strides, ListInt) @@ -1397,24 +1396,23 @@ REG_OP(UnsortedSegmentMin) * @brief Computes the minimum along segments of a tensor. * @par Inputs: -* Three inputs, including: -* @li x: A Tensor of type RealNumberType. -* @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix +* Two inputs, including: +* @li x: A Tensor of the following types:int32, int16, float16, float32. +* @li segment_ids: A 1D Tensor of type int32, whose shape is a prefix * of "x.shape". -* @li k: A Tensor. * @par Attributes: * num_segments: A required int32, specifying the number of distinct segment IDs. * @par Outputs: -* y: A Tensor of type RealNumberType. +* y: A Tensor.Must have the same type as input "x". * @see UnsortedSegmentProdD(), */ REG_OP(UnsortedSegmentMinD) - .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16})) .INPUT(segment_ids, TensorType({DT_INT32})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16})) .REQUIRED_ATTR(num_segments, Int) .OP_END_FACTORY_REG(UnsortedSegmentMinD) @@ -1447,24 +1445,23 @@ REG_OP(UnsortedSegmentProd) * @brief Computes the product along segments of a tensor. * @par Inputs: -* Three inputs, including: -* @li x: A Tensor of type RealNumberType. -* @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix +* Two inputs, including: +* @li x: A Tensor of the following types:int32, int16, float16, float32. +* @li segment_ids: A 1D Tensor of type int32, whose shape is a prefix * of "x.shape". -* @li k: A Tensor. * @par Attributes: * num_segments: An int32, specifying the number of distinct segment IDs. * @par Outputs: -* y: A Tensor of type RealNumberType. +* y: A Tensor.Must have the same type as input "x". * @see UnsortedSegmentMinD() */ REG_OP(UnsortedSegmentProdD) - .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16})) .INPUT(segment_ids, TensorType({DT_INT32})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16})) .REQUIRED_ATTR(num_segments, Int) .OP_END_FACTORY_REG(UnsortedSegmentProdD) @@ -1775,6 +1772,6 @@ REG_OP(CumulativeLogsumexpD) .ATTR(exclusive, Bool, false) .ATTR(reverse, Bool, false) .OP_END_FACTORY_REG(CumulativeLogsumexpD) - } // namespace ge + #endif // GE_OP_SELECTION_OPS_H diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index 7b8a94f8..5bbf1e78 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -21,6 +21,35 @@ namespace ge { /** +*@brief This operation convert output dataType and shape + +*@par Inputs: +*The input handle must have the resource type. Inputs include: \n +*@li x:A list of Tensor objects. One or more tensors from which \n +the enqueued tensors should be taken. + +*@par Outputs: +*@li y:A list of Tensor objects. One or more tensors from which \n +the enqueued tensors should be taken. + +*@par Attributes: +*@li type: An optional ge::DataType. It refers to the target data type of outputs. + +*@par Third-party framework compatibility +*Compatible with tensorflow QueueIsClosed operator. +*/ + +REG_OP(Bitcast) + .INPUT(x, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, + DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE, DT_COMPLEX64, + DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32})) + .OUTPUT(y, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, + DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE, DT_COMPLEX64, + DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32})) + .REQUIRED_ATTR(type, Type) + .OP_END_FACTORY_REG(Bitcast) + +/** *@brief Convert tensor format from HWCN to C1HWNCoC0. *@par Inputs: diff --git a/third_party/fwkacllib/inc/register/op_tiling.h b/third_party/fwkacllib/inc/register/op_tiling.h index 92067a20..e9d19f94 100644 --- a/third_party/fwkacllib/inc/register/op_tiling.h +++ b/third_party/fwkacllib/inc/register/op_tiling.h @@ -21,6 +21,7 @@ #include "graph/debug/ge_attr_define.h" #include "graph/node.h" +#include #include #include #include @@ -81,10 +82,11 @@ struct TeOpParas { TeOpAttrArgs attrs; }; + using OpTilingFunc = std::function; + const nlohmann::json& , OpRunInfo&)>; -using OpTilingFuncPtr = bool(*)(const std::string&, const TeOpParas&, const std::string&, OpRunInfo&); +using OpTilingFuncPtr = bool(*)(const std::string&, const TeOpParas&, const nlohmann::json& , OpRunInfo&); class FMK_FUNC_HOST_VISIBILITY OpTilingInterf { @@ -124,6 +126,7 @@ inline size_t ByteBufferGetAll(ByteBuffer &buf, char *dest, size_t dest_len) extern "C" ge::graphStatus OpParaCalculate(const ge::Node &node, OpRunInfo &run_info); +extern "C" ge::graphStatus OpAtomicCalculate(const ge::Node &node, OpRunInfo &run_info); } diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index 7539a549..2d6503f9 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -62,6 +62,10 @@ typedef enum tagRtError { RT_ERROR_DEVICE_POWER_DOWN_FAIL = 0x16, RT_ERROR_FEATURE_NOT_SUPPROT = 0x17, RT_ERROR_KERNEL_DUPLICATE = 0x18, // register same kernel repeatly + RT_ERROR_STREAM_DUPLICATE = 0x19, // streamId Map is repeatly + RT_ERROR_STREAM_NOT_EXIST = 0x1a, // streamId is not exist + RT_ERROR_SQ_NO_EXIST_SQ_TO_REUSE = 0x1b, // no exist sq to reuse + RT_ERROR_SQID_FULL = 0x3C, RT_ERROR_MODEL_STREAM_EXE_FAILED = 0x91, // the model stream failed RT_ERROR_MODEL_LOAD_FAILED = 0x94, // the model stream failed RT_ERROR_END_OF_SEQUENCE = 0x95, // end of sequence @@ -70,6 +74,8 @@ typedef enum tagRtError { RT_ERROR_CALLBACK_THREAD_UNSUBSTRIBE = 0x98, // callback thread unsubstribe RT_ERROR_DEBUG_REGISTER_FAILED = 0x99, // debug register fail RT_ERROR_DEBUG_UNREGISTER_FAILED = 0x9A, // debug unregister fail + RT_ERROR_GROUP_NOT_SET = 0x9B, + RT_ERROR_GROUP_NOT_CREATE = 0x9C, RT_ERROR_RESERVED } rtError_t; @@ -158,6 +164,12 @@ RTS_API rtError_t rtProfilerInit(const char *profdir, const char *address, const /** * @ingroup profiling_base + * @brief config rts profiler. + */ +RTS_API rtError_t rtProfilerConfig(uint16_t type); + +/** + * @ingroup profiling_base * @brief start rts profiler. */ RTS_API rtError_t rtProfilerStart(void); diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index 3dad53c5..c64ed16f 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -61,6 +61,7 @@ typedef enum tagRtPlatformType { PLATFORM_MINI_V2, PLATFORM_LHISI_ES, PLATFORM_LHISI_CS, + PLATFORM_DC, PLATFORM_END, } rtPlatformType_t; diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h index b059268e..70437b74 100644 --- a/third_party/fwkacllib/inc/runtime/context.h +++ b/third_party/fwkacllib/inc/runtime/context.h @@ -39,6 +39,17 @@ typedef enum tagCtxMode { RT_CTX_GEN_MODE = 1, } rtCtxMode_t; +typedef struct tagRtGroupInfo { + int32_t groupId; + int32_t flag; + uint32_t aicoreNum; + uint32_t aicpuNum; + uint32_t aivectorNum; + uint32_t sdmaNum; + uint32_t activeStreamNum; + void* extrPtr; +} rtGroupInfo_t; + /** * @ingroup rt_context * @brief create context and associates it with the calling thread @@ -100,14 +111,47 @@ RTS_API rtError_t rtCtxGetCurrent(rtContext_t *ctx); /** * @ingroup rt_context + * @brief returns the primary context of device. + * @param [out] ctx returned context + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtGetPriCtxByDeviceId(int32_t device, rtContext_t *ctx); + +/** + * @ingroup rt_context * @brief returns the device ID for the current context * @param [out] device returned device id * @return RT_ERROR_NONE for ok */ RTS_API rtError_t rtCtxGetDevice(int32_t *device); +/** + * @ingroup + * @brief set group id + * @param [in] groupid + * @return RT_ERROR_NONE for ok, errno for failed + */ +RTS_API rtError_t rtSetGroup(int32_t groupId); + +/** + * @ingroup + * @brief get group info + * @param [in] groupid count + * @return RT_ERROR_NONE for ok, errno for failed + */ +RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t* groupInfo, uint32_t count); + +/** + * @ingroup + * @brief get group count + * @param [in] groupid count + * @return RT_ERROR_NONE for ok, errno for failed + */ +RTS_API rtError_t rtGetGroupCount(uint32_t *count); + #ifdef __cplusplus } #endif -#endif // __CCE_RUNTIME_CONTEXT_H__ \ No newline at end of file + +#endif // __CCE_RUNTIME_CONTEXT_H__ diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index 60928202..f79f060c 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -23,6 +23,9 @@ extern "C" { #endif +#define RT_CAPABILITY_SUPPORT (0x1) +#define RT_CAPABILITY_NOT_SUPPORT (0x0) + typedef struct tagRTDeviceInfo { uint8_t env_type; // 0: FPGA 1: EMU 2: ESL uint32_t ctrl_cpu_ip; @@ -54,6 +57,16 @@ typedef enum tagRtAicpuDeployType { AICPU_DEPLOY_RESERVED } rtAicpuDeployType_t; +typedef enum tagRtFeatureType { + FEATURE_TYPE_MEMCPY = 0, + FEATURE_TYPE_RSV +} rtFeatureType_t; + +typedef enum tagMemcpyInfo { + MEMCPY_INFO_SUPPORT_ZEROCOPY = 0, + MEMCPY_INFO_RSV +} rtMemcpyInfo_t; + /** * @ingroup dvrt_dev * @brief get total device number. @@ -297,6 +310,24 @@ rtError_t rtGetSocVersion(char *version, const uint32_t maxLen); * @return RT_ERROR_NONE for ok */ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int32_t infoType, int64_t *value); + +/** + * @ingroup dvrt_dev + * @brief get capability infomation. + * @param [in] featureType feature type + typedef enum tagRtFeatureType { + FEATURE_TYPE_MEMCPY = 0, + FEATURE_TYPE_RSV, + } rtFeatureType_t; + * @param [in] infoType info type + typedef enum tagMemcpyInfo { + MEMCPY_INFO_SUPPORT_ZEROCOPY = 0, + MEMCPY_INFO _RSV, + } rtMemcpyInfo_t; + * @param [out] value the capability info + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value); #ifdef __cplusplus } #endif diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index 7c2a0728..32514e1a 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -222,17 +222,15 @@ RTS_API rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag); * @return RT_ERROR_INVALID_DEVICE_POINTER for error device memory pointer */ RTS_API rtError_t rtMemFreeManaged(void *ptr); - /** * @ingroup dvrt_mem - * @brief advise memory - * @param [in] ptr memory pointer - * @param [in] size memory size - * @param [in] advise memory advise + * @brief alloc cached device memory + * @param [in| devPtr memory pointer + * @param [in] size memory size + * @param [in] type memory type * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_DEVICE_POINTER for error device memory pointer */ -RTS_API rtError_t rtMemAdvise(void *ptr, uint64_t size, uint32_t advise); +RTS_API rtError_t rtMallocCached(void **devPtr, uint64_t size, rtMemType_t type); /** * @ingroup dvrt_mem @@ -241,7 +239,7 @@ RTS_API rtError_t rtMemAdvise(void *ptr, uint64_t size, uint32_t advise); * @param [in] len memory size * @return RT_ERROR_NONE for ok, errno for failed */ -RTS_API rtError_t rtFlushCache(uint64_t base, uint32_t len); +RTS_API rtError_t rtFlushCache(void *base, size_t len); /** * @ingroup dvrt_mem @@ -250,7 +248,7 @@ RTS_API rtError_t rtFlushCache(uint64_t base, uint32_t len); * @param [in] len memory size * @return RT_ERROR_NONE for ok, errno for failed */ -RTS_API rtError_t rtInvalidCache(uint64_t base, uint32_t len); +RTS_API rtError_t rtInvalidCache(void *base, size_t len); /** * @ingroup dvrt_mem @@ -428,19 +426,6 @@ RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqe_index, rtStream_t stre /** * @ingroup dvrt_mem - * @brief Set the memory readCount value - * @param [in] devPtr memory pointer - * @param [in] size memory size - * @param [in] readCount readCount value - * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for error input - * @return RT_ERROR_INVALID_RESOURCE_HANDLE for invalid resource handle - * @return RT_ERROR_DRV_ERR for driver error - */ -RTS_API rtError_t rtMemSetRC(const void *devPtr, uint64_t size, uint32_t readCount); - -/** - * @ingroup dvrt_mem * @brief Ipc set mem pid * @param [in] name name to be queried * @param [in] pid process id