From 5b1ccee3ee1fc1a0d0222d4882863fd3fb3e06c5 Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Thu, 2 Apr 2020 23:04:16 +0800 Subject: [PATCH] Update GraphEngine to synchronize with latest Ascend driver software suite --- CMakeLists.txt | 9 +- inc/common/dynamic_aipp.h | 0 inc/common/npu_error_define.h | 0 inc/common/opskernel/ge_task_info.h | 0 inc/common/opskernel/ops_kernel_info_types.h | 16 +- inc/common/optimizer/graph_optimizer_types.h | 0 inc/external/ge/ge_api.h | 10 + inc/external/ge/ge_api_types.h | 10 +- inc/external/graph/inference_context.h | 25 +- inc/external/graph/operator.h | 14 +- inc/external/graph/tensor.h | 11 +- inc/external/graph/types.h | 61 +- inc/external/register/register.h | 124 +--- inc/external/register/register_fmk_types.h | 6 - inc/framework/common/debug/ge_log.h | 2 + inc/framework/common/debug/log.h | 173 +++-- inc/framework/common/fmk_error_codes.h | 2 +- inc/framework/common/fmk_types.h | 4 +- inc/framework/common/ge_inner_error_codes.h | 76 +- inc/framework/common/ge_types.h | 3 +- inc/framework/common/gflags_util.h | 5 - inc/framework/common/helper/model_helper.h | 1 + inc/framework/common/helper/om_file_helper.h | 7 +- inc/framework/common/l2_cache_optimize.h | 11 +- inc/framework/common/op/attr_define.h | 806 ++++++++++++++++++++ inc/framework/common/op/attr_value_util.h | 8 +- inc/framework/common/op/ge_op_utils.h | 6 +- inc/framework/common/op/op_parser_util.h | 12 +- inc/framework/common/scope_guard.h | 2 +- inc/framework/common/types.h | 6 +- inc/framework/common/util.h | 187 +++-- inc/framework/dlog/log.h | 17 +- inc/framework/ge_runtime/task_info.h | 62 +- inc/framework/generator/generator_api.h | 1 + inc/framework/memory/memory_assigner.h | 2 +- inc/framework/omg/omg_inner_types.h | 27 +- inc/framework/omg/version.h | 0 inc/graph/compute_graph.h | 2 +- inc/graph/debug/ge_attr_define.h | 9 +- inc/graph/ge_context.h | 6 +- inc/graph/ge_global_options.h | 0 inc/graph/ge_local_context.h | 8 +- inc/graph/model.h | 5 +- inc/graph/node.h | 6 +- inc/{external => }/graph/usr_types.h | 1 - src/common/graph/anchor.cc | 11 +- src/common/graph/attr_value.cc | 1 - src/common/graph/buffer.cc | 1 - src/common/graph/compute_graph.cc | 70 +- src/common/graph/debug/ge_op_types.h | 5 +- src/common/graph/debug/ge_util.h | 283 +++---- src/common/graph/debug/graph_debug.cc | 16 +- src/common/graph/format_refiner.cc | 21 +- src/common/graph/format_refiner.h | 1 - src/common/graph/ge_attr_define.cc | 2 +- src/common/graph/ge_attr_value.cc | 72 +- src/common/graph/ge_tensor.cc | 125 ++-- src/common/graph/graph.cc | 1 - src/common/graph/inference_context.cc | 83 ++- src/common/graph/model.cc | 2 - src/common/graph/model_serialize.cc | 10 +- src/common/graph/node.cc | 12 +- src/common/graph/op_desc.cc | 35 +- src/common/graph/op_imp.cc | 15 +- src/common/graph/operator.cc | 134 ++-- src/common/graph/operator_factory_impl.cc | 2 +- src/common/graph/opsproto/opsproto_manager.cc | 2 + src/common/graph/option/ge_context.cc | 27 +- src/common/graph/option/ge_local_context.cc | 17 +- src/common/graph/shape_refiner.cc | 18 +- src/common/graph/tensor.cc | 75 +- src/common/graph/utils/anchor_utils.cc | 2 - src/common/graph/utils/ge_ir_utils.cc | 26 +- src/common/graph/utils/graph_utils.cc | 44 +- src/common/graph/utils/node_utils.cc | 8 +- src/common/graph/utils/op_desc_utils.cc | 36 +- src/common/graph/utils/string_utils.h | 1 - src/ge/CMakeLists.txt | 2 - src/ge/client/CMakeLists.txt | 4 - src/ge/client/ge_api.cc | 54 +- src/ge/common/CMakeLists.txt | 7 +- src/ge/common/auth/file_saver.cc | 19 +- src/ge/common/auth/file_saver.h | 48 +- src/ge/common/convert/pb2json.cc | 3 +- src/ge/common/convert/pb2json.h | 14 +- src/ge/common/debug/memory_dumper.cc | 34 +- .../formats/format_transfers/datatype_transfer.cc | 26 +- .../format_transfers/format_transfer_fractal_nz.cc | 28 +- .../format_transfers/format_transfer_fractal_z.cc | 27 +- .../format_transfers/format_transfer_fracz_hwcn.cc | 4 +- .../format_transfers/format_transfer_fracz_nchw.cc | 4 +- .../format_transfers/format_transfer_fracz_nhwc.cc | 4 +- src/ge/common/formats/formats.cc | 1 - src/ge/common/fp16_t.cc | 2 +- src/ge/common/fp16_t.h | 3 +- src/ge/common/ge/tbe_plugin_manager.cc | 131 ---- src/ge/common/ge/tbe_plugin_manager.h | 62 -- src/ge/common/helper/model_helper.cc | 9 +- src/ge/common/math_util.h | 17 +- src/ge/common/model_parser/base.h | 3 +- src/ge/common/model_saver.cc | 4 +- src/ge/common/op/attr_define.cc | 810 +++++++++++++++++++++ src/ge/common/op/ge_op_utils.cc | 470 +----------- src/ge/common/profiling/profiling_manager.cc | 16 +- src/ge/common/properties_manager.cc | 10 +- src/ge/common/thread_pool.h | 11 +- src/ge/common/types.cc | 10 +- src/ge/executor/CMakeLists.txt | 4 +- src/ge/executor/ge_executor.cc | 21 +- src/ge/ge_local_engine/CMakeLists.txt | 4 - src/ge/ge_local_engine/common/constant/constant.h | 6 +- src/ge/ge_local_engine/engine/ge_local_engine.cc | 2 - src/ge/ge_local_engine/engine/ge_local_engine.h | 93 ++- .../ops_kernel_store/ge_local_ops_kernel_info.cc | 18 +- .../ops_kernel_store/ge_local_ops_kernel_info.h | 94 +-- .../ops_kernel_store/op/ge_deleted_op.cc | 1 - .../ops_kernel_store/op/ge_deleted_op.h | 8 +- .../ge_local_engine/ops_kernel_store/op/no_op.cc | 2 - src/ge/ge_local_engine/ops_kernel_store/op/no_op.h | 8 +- src/ge/ge_local_engine/ops_kernel_store/op/op.cc | 4 +- src/ge/ge_local_engine/ops_kernel_store/op/op.h | 8 +- .../ops_kernel_store/op/op_factory.cc | 1 - .../ops_kernel_store/op/op_factory.h | 29 +- src/ge/ge_runtime/CMakeLists.txt | 3 - src/ge/ge_runtime/model_context.h | 10 +- src/ge/ge_runtime/model_runner.cc | 12 +- src/ge/ge_runtime/op_info_utils.cc | 632 ---------------- src/ge/ge_runtime/op_info_utils.h | 81 --- src/ge/ge_runtime/output.cc | 62 +- src/ge/ge_runtime/output.h | 3 +- src/ge/ge_runtime/runtime_model.cc | 391 +--------- src/ge/ge_runtime/runtime_model.h | 19 +- src/ge/ge_runtime/task/aicpu_task.cc | 8 +- src/ge/ge_runtime/task/aicpu_task.h | 1 - src/ge/ge_runtime/task/cce_task.cc | 21 +- src/ge/ge_runtime/task/cce_task.h | 1 - src/ge/ge_runtime/task/event_record_task.cc | 7 +- src/ge/ge_runtime/task/event_record_task.h | 3 +- src/ge/ge_runtime/task/event_wait_task.cc | 3 +- src/ge/ge_runtime/task/event_wait_task.h | 3 +- src/ge/ge_runtime/task/hccl_task.cc | 23 +- src/ge/ge_runtime/task/hccl_task.h | 1 - src/ge/ge_runtime/task/memcpy_async_task.cc | 6 +- src/ge/ge_runtime/task/memcpy_async_task.h | 1 - src/ge/ge_runtime/task/profiler_task.cc | 4 +- src/ge/ge_runtime/task/profiler_task.h | 1 - src/ge/ge_runtime/task/stream_active_task.cc | 2 +- src/ge/ge_runtime/task/stream_active_task.h | 1 - src/ge/ge_runtime/task/stream_switch_task.cc | 3 +- src/ge/ge_runtime/task/stream_switch_task.h | 1 - src/ge/ge_runtime/task/task.h | 1 - src/ge/ge_runtime/task/task_factory.h | 14 +- src/ge/ge_runtime/task/tbe_task.cc | 4 +- src/ge/ge_runtime/task/tbe_task.h | 1 - src/ge/generator/ge_generator.cc | 29 +- src/ge/generator/generator_api.cc | 222 +++--- src/ge/graph/build/graph_build.cc | 12 +- src/ge/graph/build/graph_build.h | 11 +- src/ge/graph/build/logical_stream_allocator.cc | 15 +- src/ge/graph/build/memory/CMakeLists.txt | 1 - .../build/memory/binary_block_mem_assigner.cc | 4 +- .../graph/build/memory/binary_block_mem_assigner.h | 1 - src/ge/graph/build/memory/block_mem_assigner.cc | 64 +- src/ge/graph/build/memory/block_mem_assigner.h | 14 +- src/ge/graph/build/memory/graph_mem_assigner.cc | 31 +- src/ge/graph/build/memory/graph_mem_assigner.h | 1 - src/ge/graph/build/memory/hybrid_mem_assigner.cc | 2 - src/ge/graph/build/memory/hybrid_mem_assigner.h | 2 +- .../graph/build/memory/max_block_mem_assigner.cc | 1 - src/ge/graph/build/memory/max_block_mem_assigner.h | 2 - src/ge/graph/build/memory/mem_assigner.h | 1 - src/ge/graph/build/memory/memory_assigner.cc | 4 +- src/ge/graph/build/memory/var_mem_assign_util.cc | 43 +- src/ge/graph/build/memory/var_mem_assign_util.h | 2 - src/ge/graph/build/model_builder.cc | 77 +- src/ge/graph/build/model_builder.h | 1 - src/ge/graph/build/optimize_stream_graph.cc | 18 +- src/ge/graph/build/optimize_stream_graph.h | 1 - src/ge/graph/build/run_context.cc | 3 +- src/ge/graph/build/run_context.h | 4 +- src/ge/graph/build/stream_allocator.cc | 25 +- src/ge/graph/build/stream_allocator.h | 1 - src/ge/graph/build/task_generator.cc | 27 +- src/ge/graph/build/task_generator.h | 1 - src/ge/graph/common/omg_util.cc | 1 + src/ge/graph/execute/graph_execute.cc | 9 +- src/ge/graph/load/new_model_manager/data_dumper.cc | 102 ++- src/ge/graph/load/new_model_manager/data_dumper.h | 41 +- .../graph/load/new_model_manager/davinci_model.cc | 327 +++++---- .../graph/load/new_model_manager/davinci_model.h | 28 +- .../graph/load/new_model_manager/model_manager.cc | 64 +- .../graph/load/new_model_manager/model_manager.h | 12 +- src/ge/graph/load/new_model_manager/model_utils.h | 2 - .../new_model_manager/task_info/hccl_task_info.cc | 12 +- .../task_info/kernel_ex_task_info.cc | 30 +- .../task_info/kernel_task_info.cc | 134 ++-- .../new_model_manager/task_info/kernel_task_info.h | 5 +- src/ge/graph/load/output/output.h | 4 +- src/ge/graph/manager/graph_manager.cc | 79 +- src/ge/graph/manager/graph_manager.h | 7 +- src/ge/graph/manager/graph_manager_utils.cc | 31 +- src/ge/graph/manager/graph_manager_utils.h | 7 +- src/ge/graph/manager/graph_var_manager.cc | 31 +- src/ge/graph/manager/graph_var_manager.h | 3 +- src/ge/graph/manager/util/debug.cc | 13 +- src/ge/graph/manager/util/debug.h | 0 src/ge/graph/optimize/graph_optimize.cc | 30 +- src/ge/graph/optimize/graph_optimize.h | 5 +- src/ge/graph/optimize/summary_optimize.cc | 2 +- src/ge/graph/partition/engine_place.cc | 5 +- src/ge/graph/partition/graph_partition.cc | 30 +- src/ge/graph/partition/graph_partition.h | 7 +- src/ge/graph/passes/aicpu_constant_folding_pass.cc | 12 +- src/ge/graph/passes/aicpu_constant_folding_pass.h | 1 - src/ge/graph/passes/assert_pass.h | 6 +- src/ge/graph/passes/atomic_addr_clean_pass.cc | 10 +- src/ge/graph/passes/atomic_addr_clean_pass.h | 63 +- src/ge/graph/passes/compile_nodes_pass.cc | 153 ++-- src/ge/graph/passes/compile_nodes_pass.h | 10 +- src/ge/graph/passes/constant_fuse_same_pass.cc | 4 +- src/ge/graph/passes/control_op_attr_pass.cc | 1 - src/ge/graph/passes/control_trigger_pass.h | 12 +- src/ge/graph/passes/flow_ctrl_pass.cc | 57 +- src/ge/graph/passes/flow_ctrl_pass.h | 13 +- src/ge/graph/passes/folding_kernel/add_kernel.cc | 1 + src/ge/graph/passes/folding_kernel/add_kernel.h | 11 +- .../passes/folding_kernel/concat_offset_kernel.cc | 2 +- .../graph/passes/folding_kernel/floordiv_kernel.h | 2 +- .../passes/folding_kernel/gather_v2_kernel.cc | 14 +- src/ge/graph/passes/folding_kernel/mul_kernel.cc | 17 +- src/ge/graph/passes/folding_kernel/pack_kernel.cc | 9 +- .../passes/folding_kernel/reduce_prod_kernel.cc | 12 +- .../passes/folding_kernel/reduce_prod_kernel.h | 2 +- .../graph/passes/folding_kernel/reshape_kernel.h | 8 +- .../graph/passes/folding_kernel/squeeze_kernel.h | 9 +- .../passes/folding_kernel/ssd_prior_box_kernel.cc | 4 +- .../passes/folding_kernel/ssd_prior_box_kernel.h | 130 ++-- src/ge/graph/passes/folding_pass.cc | 5 +- src/ge/graph/passes/get_original_format_pass.cc | 8 +- src/ge/graph/passes/hccl_memcpy_pass.cc | 4 +- src/ge/graph/passes/isolated_op_remove_pass.cc | 1 - src/ge/graph/passes/iterator_op_pass.cc | 34 +- src/ge/graph/passes/link_gen_mask_nodes_pass.cc | 4 +- src/ge/graph/passes/link_gen_mask_nodes_pass.h | 4 - src/ge/graph/passes/multi_batch_pass.h | 2 +- src/ge/graph/passes/net_output_pass.cc | 11 +- src/ge/graph/passes/no_reshape_op_remove_pass.cc | 14 +- src/ge/graph/passes/pass_manager.cc | 3 +- src/ge/graph/passes/pass_utils.cc | 12 +- src/ge/graph/passes/print_op_pass.h | 1 + .../passes/same_transdata_breadth_fusion_pass.cc | 24 +- .../passes/same_transdata_breadth_fusion_pass.h | 22 +- .../graph/passes/shape_operate_op_remove_pass.cc | 2 +- src/ge/graph/passes/switch_op_pass.cc | 8 +- src/ge/graph/passes/transop_depth_fusion_pass.cc | 38 +- .../passes/transop_without_reshape_fusion_pass.h | 4 +- src/ge/graph/passes/transpose_transdata_pass.cc | 17 +- src/ge/graph/passes/unused_op_remove_pass.cc | 12 +- src/ge/graph/passes/update_net_output_pass.cc | 17 +- src/ge/graph/passes/var_is_initialized_op_pass.cc | 25 +- src/ge/graph/passes/variable_format_pass.cc | 9 +- src/ge/graph/passes/variable_op_pass.cc | 50 +- src/ge/graph/passes/variable_prepare_op_pass.cc | 12 +- src/ge/graph/passes/variable_ref_delete_op_pass.cc | 6 +- src/ge/graph/preprocess/graph_preprocess.cc | 55 +- src/ge/graph/preprocess/graph_preprocess.h | 10 +- .../graph/preprocess/insert_op/base_insert_op.cc | 32 +- src/ge/graph/preprocess/insert_op/base_insert_op.h | 2 - src/ge/graph/preprocess/insert_op/ge_aipp_op.cc | 36 +- src/ge/graph/preprocess/insert_op/ge_aipp_op.h | 9 +- .../preprocess/insert_op/util_insert_aipp_op.cc | 69 +- .../preprocess/insert_op/util_insert_aipp_op.h | 9 +- src/ge/graph/preprocess/multi_batch_copy_graph.cc | 44 +- src/ge/graph/preprocess/multi_batch_copy_graph.h | 33 +- src/ge/init/gelib.cc | 17 +- src/ge/init/gelib.h | 4 +- src/ge/model/ge_model.cc | 6 +- src/ge/model/ge_model.h | 14 +- src/ge/opskernel_manager/ops_kernel_manager.cc | 17 +- src/ge/session/inner_session.cc | 55 +- src/ge/session/inner_session.h | 10 +- src/ge/session/session_manager.cc | 10 +- src/ge/session/session_manager.h | 19 +- src/proto/CMakeLists.txt | 66 -- src/proto/op_mapping_info.proto | 13 +- .../testcase/ge_graph/ge_operator_unittest.cc | 103 --- .../build/logical_stream_allocator_unittest.cc | 4 +- tests/ut/ge/graph/build/mem_assigner_unittest.cc | 1 - .../new_model_manager_davinci_model_unittest.cc | 1 - tests/ut/ge/graph/passes/base_pass_unittest.cc | 1 - .../ut/ge/graph/passes/flow_ctrl_pass_unittest.cc | 0 .../broadcast_gradient_args_kernel_unittest.cc | 6 +- .../passes/folding_kernel/empty_kernel_unittest.cc | 2 +- .../folding_kernel/expanddims_kernel_unittest.cc | 0 .../passes/folding_kernel/pack_kernel_unittest.cc | 2 +- .../passes/folding_kernel/size_kernel_unittest.cc | 2 +- .../strided_slice_kernel_unittest.cc | 1 + tests/ut/ge/graph/passes/merge_pass_unittest.cc | 0 tests/ut/ge/graph/passes/print_op_pass_unittest.cc | 1 - tests/ut/ge/graph/passes/snapshot_pass_unittest.cc | 0 .../passes/update_net_output_pass_unittest.cc | 1 - .../ge/profiling/ge_profiling_manager_unittest.cc | 2 +- .../ut/ge/single_op/single_op_manager_unittest.cc | 0 tests/ut/ge/single_op/single_op_model_unittest.cc | 0 .../fwkacllib/inc/aicpu/common/aicpu_task_struct.h | 16 +- third_party/fwkacllib/inc/cce/aicpu_engine.h | 6 +- .../fwkacllib/inc/cce/aicpu_engine_struct.h | 6 +- third_party/fwkacllib/inc/cce/blas_struct.h | 6 +- third_party/fwkacllib/inc/cce/cce.h | 6 +- third_party/fwkacllib/inc/cce/cce_def.hpp | 6 +- third_party/fwkacllib/inc/cce/common/attr_list.hpp | 7 +- third_party/fwkacllib/inc/cce/common/catch.hpp | 6 +- third_party/fwkacllib/inc/cce/compiler_stub.h | 6 +- third_party/fwkacllib/inc/cce/customize.h | 7 +- third_party/fwkacllib/inc/cce/dnn.h | 6 +- third_party/fwkacllib/inc/cce/dnn_base.h | 8 +- third_party/fwkacllib/inc/cce/dnn_base_def.hpp | 19 +- third_party/fwkacllib/inc/cce/dnn_op.h | 8 +- third_party/fwkacllib/inc/cce/dnn_struct.hpp | 6 +- third_party/fwkacllib/inc/cce/dnn_struct_base.hpp | 6 +- third_party/fwkacllib/inc/cce/fwk_adpt_struct.h | 6 +- third_party/fwkacllib/inc/cce/l2fusion_struct.hpp | 6 +- .../fwkacllib/inc/cce/optimizer/fusion_engine.h | 6 +- third_party/fwkacllib/inc/cce/taskdown_api.h | 7 +- third_party/fwkacllib/inc/cce/taskdown_common.hpp | 6 +- third_party/fwkacllib/inc/hccl/base.h | 113 +-- third_party/fwkacllib/inc/hccl/hcom.h | 220 +++++- third_party/fwkacllib/inc/mmpa/mmpa_api.h | 6 +- .../fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h | 96 ++- .../inc/mmpa/sub_inc/mmpa_typedef_linux.h | 10 +- .../fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h | 10 +- third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h | 57 +- third_party/fwkacllib/inc/ops/aipp.h | 0 third_party/fwkacllib/inc/ops/aipp_data.h | 0 third_party/fwkacllib/inc/ops/all_ops.h | 3 - third_party/fwkacllib/inc/ops/array_ops.h | 0 third_party/fwkacllib/inc/ops/audio_ops.h | 0 third_party/fwkacllib/inc/ops/basic_lstm_cell.h | 0 third_party/fwkacllib/inc/ops/batch_ops.h | 0 third_party/fwkacllib/inc/ops/bitwise_ops.h | 0 third_party/fwkacllib/inc/ops/boosted_trees_ops.h | 0 .../fwkacllib/inc/ops/candidate_sampling_ops.h | 0 third_party/fwkacllib/inc/ops/clip_boxes.h | 34 - third_party/fwkacllib/inc/ops/control_flow_ops.h | 0 third_party/fwkacllib/inc/ops/data_flow_ops.h | 0 third_party/fwkacllib/inc/ops/decode_bbox.h | 33 - .../fwkacllib/inc/ops/decode_boundaries_target.h | 31 - .../inc/ops/decode_cornerpoints_target_bg.h | 31 - .../ops/decode_cornerpoints_target_wrt_center_v1.h | 32 - .../fwkacllib/inc/ops/decode_wheels_target.h | 31 - third_party/fwkacllib/inc/ops/dvpp_ops.h | 0 .../fwkacllib/inc/ops/elewise_calculation_ops.h | 360 ++++++++- .../fwkacllib/inc/ops/fsrdetectionoutput_ops.h | 39 - third_party/fwkacllib/inc/ops/functional_ops.h | 0 third_party/fwkacllib/inc/ops/get_data_ops.h | 0 third_party/fwkacllib/inc/ops/hcom_ops.h | 391 +++++----- third_party/fwkacllib/inc/ops/linalg_ops.h | 0 third_party/fwkacllib/inc/ops/logging_ops.h | 5 + third_party/fwkacllib/inc/ops/lookup_ops.h | 0 third_party/fwkacllib/inc/ops/math_ops.h | 0 .../fwkacllib/inc/ops/matrix_calculation_ops.h | 71 +- third_party/fwkacllib/inc/ops/mvn_ops.h | 51 -- third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h | 52 ++ third_party/fwkacllib/inc/ops/nn_calculation_ops.h | 198 +++-- third_party/fwkacllib/inc/ops/nn_detect_ops.h | 0 third_party/fwkacllib/inc/ops/nn_norm_ops.h | 36 + third_party/fwkacllib/inc/ops/nn_ops.h | 0 third_party/fwkacllib/inc/ops/nn_other_ops.h | 136 ++++ third_party/fwkacllib/inc/ops/nn_pooling_ops.h | 58 +- third_party/fwkacllib/inc/ops/nn_training_ops.h | 137 +++- third_party/fwkacllib/inc/ops/no_op.h | 0 third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h | 18 +- third_party/fwkacllib/inc/ops/normalize_ops.h | 51 -- third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h | 0 third_party/fwkacllib/inc/ops/outfeed_ops.h | 0 third_party/fwkacllib/inc/ops/pad_ops.h | 29 + third_party/fwkacllib/inc/ops/power_ops.h | 34 - third_party/fwkacllib/inc/ops/prior_box.h | 129 ---- third_party/fwkacllib/inc/ops/random_ops.h | 15 + third_party/fwkacllib/inc/ops/reduce_ops.h | 27 + third_party/fwkacllib/inc/ops/reduction_ops.h | 33 - third_party/fwkacllib/inc/ops/roipooling_ops.h | 37 - third_party/fwkacllib/inc/ops/rpn_ops.h | 0 third_party/fwkacllib/inc/ops/save_ops.h | 2 +- third_party/fwkacllib/inc/ops/scale_ops.h | 37 - third_party/fwkacllib/inc/ops/selection_ops.h | 435 +---------- third_party/fwkacllib/inc/ops/set_ops.h | 0 .../fwkacllib/inc/ops/shuffle_channel_ops.h | 31 - third_party/fwkacllib/inc/ops/sparse_ops.h | 0 .../fwkacllib/inc/ops/split_combination_ops.h | 0 third_party/fwkacllib/inc/ops/state_ops.h | 0 .../fwkacllib/inc/ops/stateless_random_ops.h | 2 +- third_party/fwkacllib/inc/ops/threshold_ops.h | 33 - third_party/fwkacllib/inc/ops/transformation_ops.h | 63 +- third_party/fwkacllib/inc/register/op_registry.h | 75 +- third_party/fwkacllib/inc/runtime/base.h | 9 +- third_party/fwkacllib/inc/runtime/config.h | 6 +- third_party/fwkacllib/inc/runtime/context.h | 2 +- third_party/fwkacllib/inc/runtime/dev.h | 2 +- third_party/fwkacllib/inc/runtime/dvfsprofile.h | 2 +- third_party/fwkacllib/inc/runtime/event.h | 2 +- third_party/fwkacllib/inc/runtime/kernel.h | 6 +- third_party/fwkacllib/inc/runtime/mem.h | 4 +- third_party/fwkacllib/inc/runtime/rt_model.h | 5 +- third_party/fwkacllib/inc/runtime/stream.h | 2 +- third_party/fwkacllib/inc/tdt/data_common.h | 2 +- third_party/fwkacllib/inc/tdt/status.h | 76 +- third_party/fwkacllib/inc/tdt/tdt_host_interface.h | 83 ++- third_party/fwkacllib/inc/tdt/tsd_client.h | 58 +- third_party/fwkacllib/inc/toolchain/prof_engine.h | 20 +- .../fwkacllib/inc/toolchain/prof_mgr_core.h | 15 +- .../fwkacllib/inc/toolchain/prof_reporter.h | 19 +- third_party/fwkacllib/inc/toolchain/slog.h | 12 +- third_party/fwkacllib/version.info | 2 +- third_party/prebuild/x86_64/libslog.so | Bin 89288 -> 89288 bytes 415 files changed, 6728 insertions(+), 6621 deletions(-) mode change 100755 => 100644 inc/common/dynamic_aipp.h mode change 100755 => 100644 inc/common/npu_error_define.h mode change 100755 => 100644 inc/common/opskernel/ge_task_info.h mode change 100755 => 100644 inc/common/optimizer/graph_optimizer_types.h mode change 100755 => 100644 inc/external/graph/types.h mode change 100755 => 100644 inc/framework/common/ge_types.h mode change 100755 => 100644 inc/framework/common/helper/model_helper.h mode change 100755 => 100644 inc/framework/common/helper/om_file_helper.h create mode 100644 inc/framework/common/op/attr_define.h mode change 100755 => 100644 inc/framework/common/types.h mode change 100755 => 100644 inc/framework/omg/version.h mode change 100755 => 100644 inc/graph/compute_graph.h mode change 100755 => 100644 inc/graph/ge_global_options.h mode change 100755 => 100644 inc/graph/model.h rename inc/{external => }/graph/usr_types.h (99%) mode change 100755 => 100644 src/common/graph/compute_graph.cc mode change 100755 => 100644 src/ge/common/debug/memory_dumper.cc delete mode 100644 src/ge/common/ge/tbe_plugin_manager.cc delete mode 100644 src/ge/common/ge/tbe_plugin_manager.h mode change 100755 => 100644 src/ge/common/model_saver.cc create mode 100644 src/ge/common/op/attr_define.cc delete mode 100644 src/ge/ge_runtime/op_info_utils.cc delete mode 100644 src/ge/ge_runtime/op_info_utils.h mode change 100755 => 100644 src/ge/ge_runtime/task/hccl_task.cc mode change 100755 => 100644 src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc mode change 100755 => 100644 src/ge/graph/manager/util/debug.cc mode change 100755 => 100644 src/ge/graph/manager/util/debug.h mode change 100755 => 100644 src/ge/opskernel_manager/ops_kernel_manager.cc delete mode 100644 src/proto/CMakeLists.txt mode change 100644 => 100755 tests/ut/ge/graph/passes/flow_ctrl_pass_unittest.cc mode change 100644 => 100755 tests/ut/ge/graph/passes/folding_kernel/expanddims_kernel_unittest.cc mode change 100644 => 100755 tests/ut/ge/graph/passes/merge_pass_unittest.cc mode change 100644 => 100755 tests/ut/ge/graph/passes/snapshot_pass_unittest.cc mode change 100644 => 100755 tests/ut/ge/single_op/single_op_manager_unittest.cc mode change 100644 => 100755 tests/ut/ge/single_op/single_op_model_unittest.cc mode change 100644 => 100755 third_party/fwkacllib/inc/cce/cce_def.hpp mode change 100644 => 100755 third_party/fwkacllib/inc/cce/common/attr_list.hpp mode change 100644 => 100755 third_party/fwkacllib/inc/cce/common/catch.hpp mode change 100644 => 100755 third_party/fwkacllib/inc/cce/dnn_base_def.hpp mode change 100644 => 100755 third_party/fwkacllib/inc/cce/dnn_struct.hpp mode change 100644 => 100755 third_party/fwkacllib/inc/cce/dnn_struct_base.hpp mode change 100644 => 100755 third_party/fwkacllib/inc/cce/l2fusion_struct.hpp mode change 100644 => 100755 third_party/fwkacllib/inc/cce/taskdown_common.hpp mode change 100755 => 100644 third_party/fwkacllib/inc/ops/aipp.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/aipp_data.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/all_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/array_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/audio_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/basic_lstm_cell.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/batch_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/bitwise_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/boosted_trees_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/candidate_sampling_ops.h delete mode 100644 third_party/fwkacllib/inc/ops/clip_boxes.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/control_flow_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/data_flow_ops.h delete mode 100644 third_party/fwkacllib/inc/ops/decode_bbox.h delete mode 100644 third_party/fwkacllib/inc/ops/decode_boundaries_target.h delete mode 100755 third_party/fwkacllib/inc/ops/decode_cornerpoints_target_bg.h delete mode 100755 third_party/fwkacllib/inc/ops/decode_cornerpoints_target_wrt_center_v1.h delete mode 100644 third_party/fwkacllib/inc/ops/decode_wheels_target.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/dvpp_ops.h delete mode 100755 third_party/fwkacllib/inc/ops/fsrdetectionoutput_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/functional_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/get_data_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/linalg_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/lookup_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/math_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/matrix_calculation_ops.h delete mode 100755 third_party/fwkacllib/inc/ops/mvn_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/nn_calculation_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/nn_detect_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/nn_norm_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/nn_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/nn_other_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/no_op.h delete mode 100644 third_party/fwkacllib/inc/ops/normalize_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/outfeed_ops.h delete mode 100644 third_party/fwkacllib/inc/ops/power_ops.h delete mode 100644 third_party/fwkacllib/inc/ops/prior_box.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/random_ops.h delete mode 100755 third_party/fwkacllib/inc/ops/reduction_ops.h delete mode 100755 third_party/fwkacllib/inc/ops/roipooling_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/rpn_ops.h delete mode 100755 third_party/fwkacllib/inc/ops/scale_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/set_ops.h delete mode 100755 third_party/fwkacllib/inc/ops/shuffle_channel_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/sparse_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/split_combination_ops.h mode change 100755 => 100644 third_party/fwkacllib/inc/ops/state_ops.h delete mode 100755 third_party/fwkacllib/inc/ops/threshold_ops.h diff --git a/CMakeLists.txt b/CMakeLists.txt index a75912fa..358334c2 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,17 +75,16 @@ elseif(DEFINED ENV{D_LINK_PATH}) find_library(resource libresource.so ${GE_LIB_PATH}) else() # Ascend mode - set(HIAI_INSTALLED_DIR /usr/local/HiAI) - set(HIAI_DRIVER_DIR ${HIAI_INSTALLED_DIR}/driver/lib64) - set(HIAI_RUNTIME_DIR ${HIAI_INSTALLED_DIR}/runtime/lib64) + set(HIAI_INSTALLED_DIR /usr/local/Ascend) + set(HIAI_DRIVER_DIR ${HIAI_INSTALLED_DIR}/driver/lib64/common) + set(HIAI_RUNTIME_DIR ${HIAI_INSTALLED_DIR}/fwkacllib/lib64) find_library(c_sec libc_sec.so ${HIAI_DRIVER_DIR}) find_library(slog libslog.so ${HIAI_DRIVER_DIR}) find_library(mmpa libmmpa.so ${HIAI_DRIVER_DIR}) + find_library(msprof libmsprof.so ${HIAI_DRIVER_DIR}) - find_library(cce libcce.so ${HIAI_RUNTIME_DIR}) find_library(hccl libhccl.so ${HIAI_RUNTIME_DIR}) find_library(runtime libruntime.so ${HIAI_RUNTIME_DIR}) - find_library(msprof libmsprof.so ${HIAI_RUNTIME_DIR}) find_library(register libregister.so ${HIAI_RUNTIME_DIR}) find_library(resource libresource.so ${HIAI_RUNTIME_DIR}) endif() diff --git a/inc/common/dynamic_aipp.h b/inc/common/dynamic_aipp.h old mode 100755 new mode 100644 diff --git a/inc/common/npu_error_define.h b/inc/common/npu_error_define.h old mode 100755 new mode 100644 diff --git a/inc/common/opskernel/ge_task_info.h b/inc/common/opskernel/ge_task_info.h old mode 100755 new mode 100644 diff --git a/inc/common/opskernel/ops_kernel_info_types.h b/inc/common/opskernel/ops_kernel_info_types.h index 75e437af..19a738de 100644 --- a/inc/common/opskernel/ops_kernel_info_types.h +++ b/inc/common/opskernel/ops_kernel_info_types.h @@ -18,16 +18,15 @@ #define INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_TYPES_H_ #include - #include #include - #include "graph/buffer.h" #include "runtime/rt_model.h" using std::string; namespace ge { +/*lint -e148*/ struct RunContext { rtModel_t model; rtStream_t stream; @@ -37,10 +36,12 @@ struct RunContext { uint64_t weightMemSize; uint8_t *weightMemBase; ge::Buffer weightsBuffer; - std::vector graphStreamList; // all streams of graph which are sort by ge stream id(0,1,...) - std::vector graphEventList; // all events of graph which are sort by ge event id(0,1,...) + std::vector graphStreamList; // all streams of graph, order by ge stream id(0,1,...) + std::vector graphEventList; // all events of graph, order by ge event id(0,1,...) }; +/*lint +e148*/ + struct Task { uint32_t id; uint16_t type; @@ -49,10 +50,11 @@ struct Task { }; struct OpInfo { - string engine; // engine name - string opKernelLib; // opsKernelStore name + string engine; // which engin + /*lint -e148*/ + string opKernelLib; // which opsKernelStore int computeCost; // compute cost - bool flagPartial; // whether to support related shape + bool flagPartial; // whether to support is related to shape bool flagAsync; // Whether to support asynchronous bool isAtomic; // whether to support atomic addr clean string opFileName; // op file name diff --git a/inc/common/optimizer/graph_optimizer_types.h b/inc/common/optimizer/graph_optimizer_types.h old mode 100755 new mode 100644 diff --git a/inc/external/ge/ge_api.h b/inc/external/ge/ge_api.h index 6e49df97..e9beae6f 100644 --- a/inc/external/ge/ge_api.h +++ b/inc/external/ge/ge_api.h @@ -50,6 +50,16 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Session { Status AddGraph(uint32_t graphId, const Graph &graph); /// + /// @ingroup client + /// @brief add a graph with a specific graphId and graphOptions + /// @param [in] graphId graph id + /// @param [in] graph the graph + /// @param [in] options graph options + /// @return Status result of function + /// + Status AddGraph(uint32_t graphId, const Graph &graph, const std::map &options); + + /// /// @ingroup ge_graph /// @brief remove a graph of the session with specific session id /// @param [in] graphId graph id diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index cdecd987..7aad64b6 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -50,7 +50,7 @@ const char *const VARIABLE_MEMORY_MAX_SIZE = "ge.variableMemoryMaxSize"; // its value should be int32_t type, default value is "1" const std::string STREAM_NUM = "ge.streamNum"; -// Configure add head stream to model, +// Configure add head stream to model. // its value should be "0" or "1", default value is "0" const std::string HEAD_STREAM = "ge.headStream"; @@ -138,7 +138,7 @@ const std::string GE_FE_FLAG = "ge.feFlag"; // this option is to obtain stream max parallel num const std::string STREAM_MAX_PARALLEL_NUM = "ge.streamMaxParallelNum"; -// configure outputDatatype to setting net output type +// congigure outputDatatype to setting net output type const std::string OUTPUT_DATATYPE = "ge.outputDatatype"; // configure whether to enable hcom parallel by session constructor options param, @@ -149,7 +149,7 @@ const std::string HCOM_PARALLEL = "ge.hcomParallel"; // example: GA|RL, support configure multiple, split by | const std::string AUTO_TUNE_MODE = "ge.autoTuneMode"; -// Configure core type "VectorEngine", default value is "AICoreEngine" +// Configure core type "VectorEngine", default value is "AIcoreEngine" const std::string CORE_TYPE = "ge.engineType"; // Configure soc version , example: "Ascend310" @@ -165,6 +165,10 @@ const char *const OPTION_GE_MAX_DUMP_FILE_NUM = "ge.maxDumpFileNum"; const char *const OPTION_GE_MAX_DUMP_FILE_SIZE = "ge.maxDumpFileSize"; const char *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum"; +// Configure for print op pass +// Its value should be "0" or "1", default value is "1" +const char *const ENABLE_PRINT_OP_PASS = "ge.enablePrintOpPass"; + // Graph run mode enum GraphRunMode { PREDICTION = 0, TRAIN }; diff --git a/inc/external/graph/inference_context.h b/inc/external/graph/inference_context.h index 993a4bf4..eb8fae3d 100644 --- a/inc/external/graph/inference_context.h +++ b/inc/external/graph/inference_context.h @@ -28,29 +28,29 @@ namespace ge { class InferenceContext; using InferenceContextPtr = std::shared_ptr; +class ShapeAndTypeImpl; class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ShapeAndType { public: - ShapeAndType() = default; + ShapeAndType(); ~ShapeAndType() = default; - ShapeAndType(const Shape &shape, DataType data_type); + ShapeAndType(const Shape &shape, DataType dataType); void SetShape(const Shape &shape); - void SetType(DataType data_type); + void SetType(DataType dataType); - const Shape &GetShape() const; + Shape GetShape() const; DataType GetDataType() const; private: - Shape shape_; - DataType data_type_ = DT_UNDEFINED; + std::shared_ptr shape_and_type_impl_; }; +class InferenceContextImpl; class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY InferenceContext { public: - InferenceContext() = default; ~InferenceContext() = default; InferenceContext(const InferenceContext &context) = delete; InferenceContext(const InferenceContext &&context) = delete; @@ -58,22 +58,19 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY InferenceContext { InferenceContext &operator=(const InferenceContext &&context) = delete; void SetInputHandleShapesAndTypes(std::vector> &&shapes_and_types); - const std::vector> &GetInputHandleShapesAndTypes() const; - const std::vector> &GetOutputHandleShapesAndTypes() const; - void SetOutputHandleShapesAndTypes(const std::vector> &shapes_and_types); void SetOutputHandleShapesAndTypes(std::vector> &&shapes_and_types); void SetMarks(const std::vector &marks); const std::vector &GetMarks() const; + static std::unique_ptr Create(); + private: - // For deliver to op in pair, help to support dynamic shape - std::vector marks_; - std::vector> input_handle_shapes_and_types_; - std::vector> output_handle_shapes_and_types_; + InferenceContext(std::unique_ptr &impl); + std::shared_ptr inference_context_impl_; }; } // namespace ge #endif // INC_EXTERNAL_GRAPH_INFERENCE_CONTEXT_H_ diff --git a/inc/external/graph/operator.h b/inc/external/graph/operator.h index 4b84f074..0ffa7313 100644 --- a/inc/external/graph/operator.h +++ b/inc/external/graph/operator.h @@ -24,9 +24,8 @@ #include #include "external/graph/ge_error_codes.h" -#include "external/graph//inference_context.h" -#include "external/graph//tensor.h" -#include "external/graph//usr_types.h" +#include "external/graph/inference_context.h" +#include "external/graph/tensor.h" #ifndef USER_GE_LOGI #define USER_GE_LOGI(...) @@ -182,9 +181,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Operator { // Bytes type graphStatus GetAttr(const string &name, OpBytes &attr_value) const; - Operator &SetAttr(const string &name, const UsrQuantizeFactorParams &attr_value); - graphStatus GetAttr(const string &name, UsrQuantizeFactorParams &attr_value) const; - Operator &SetAttr(const string &name, const std::vector> &attr_value); graphStatus GetAttr(const string &name, std::vector> &attr_value) const; @@ -235,11 +231,9 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Operator { graphStatus VerifyAll(); // Only has one output index = 0 - Operator &SetInput(const string &dst_name, uint32_t dst_index, - const Operator &src_oprt); + Operator &SetInput(const string &dst_name, uint32_t dst_index, const Operator &src_oprt); - Operator &SetInput(const string &dst_name, uint32_t dst_index, const Operator &src_oprt, - const string &name); + Operator &SetInput(const string &dst_name, uint32_t dst_index, const Operator &src_oprt, const string &name); private: Operator &SetInput(const string &dst_name, const OutHandler &out_handler); diff --git a/inc/external/graph/tensor.h b/inc/external/graph/tensor.h index a7f4a877..5224c35c 100644 --- a/inc/external/graph/tensor.h +++ b/inc/external/graph/tensor.h @@ -26,9 +26,10 @@ #include "external/graph/types.h" namespace ge { +class ShapeImpl; class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Shape { public: - Shape() = default; + Shape(); ~Shape() = default; explicit Shape(const std::vector &dims); @@ -40,7 +41,7 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Shape { int64_t GetShapeSize() const; private: - std::vector dims_; + std::shared_ptr impl_; }; class TensorDescImpl; @@ -66,10 +67,10 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY TensorDesc { void SetFormat(Format format); Shape GetOriginShape() const; - void SetOriginShape(const Shape &origin_shape); + void SetOriginShape(const Shape &originShape); Format GetOriginFormat() const; - void SetOriginFormat(Format origin_format); + void SetOriginFormat(Format originFormat); DataType GetDataType() const; void SetDataType(DataType dt); @@ -82,7 +83,7 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY TensorDesc { int64_t GetSize() const; int64_t GetRealDimCnt() const; - void SetRealDimCnt(const int64_t real_dim_cnt); + void SetRealDimCnt(const int64_t realDimCnt); private: std::shared_ptr impl; diff --git a/inc/external/graph/types.h b/inc/external/graph/types.h old mode 100755 new mode 100644 index d8c60ae0..605b6c95 --- a/inc/external/graph/types.h +++ b/inc/external/graph/types.h @@ -67,33 +67,33 @@ enum DataType { inline int GetSizeByDataType(DataType data_type) { static int data_type_size[DT_UNDEFINED] = { - 4, // DT_FLOAT = 0, float type - 2, // DT_FLOAT16 = 1, fp16 type - 1, // DT_INT8 = 2, int8 type - 4, // DT_INT32 = 3, - 1, // DT_UINT8 = 4, uint8 type - -1, - 2, // DT_INT16 = 6, int16 type - 2, // DT_UINT16 = 7, uint16 type - 4, // DT_UINT32 = 8, unsigned int32 - 8, // DT_INT64 = 9, int64 type - 8, // DT_UINT64 = 10, unsigned int64 - 8, // DT_DOUBLE = 11, double type - 1, // DT_BOOL = 12, bool type - -1, // DT_STRING = 13, string type - 1, // DT_DUAL_SUB_INT8 = 14, dual output int8 type - 1, // DT_DUAL_SUB_UINT8 = 15, dual output uint8 type - 8, // DT_COMPLEX64 = 16, complex64 type - 16, // DT_COMPLEX128 = 17, complex128 type - 1, // DT_QINT8 = 18, qint8 type - 2, // DT_QINT16 = 19, qint16 type - 4, // DT_QINT32 = 20, qint32 type - 1, // DT_QUINT8 = 21, quint8 type - 2, // DT_QUINT16 = 22, quint16 type - -1, // DT_RESOURCE = 23, resource type - -1, // DT_STRING_REF = 24, string ref type - 5, // DT_DUAL = 25, dual output type (float + int8) - // DT_UNDEFINED Used to indicate a DataType field has not been set. + 4, // DT_FLOAT = 0, float type + 2, // DT_FLOAT16 = 1, fp16 type + 1, // DT_INT8 = 2, int8 type + 4, // DT_INT32 = 3, + 1, // DT_UINT8 = 4, uint8 type + -1, + 2, // DT_INT16 = 6, int16 type + 2, // DT_UINT16 = 7, uint16 type + 4, // DT_UINT32 = 8, unsigned int32 + 8, // DT_INT64 = 9, int64 type + 8, // DT_UINT64 = 10, unsigned int64 + 8, // DT_DOUBLE = 11, double type + 1, // DT_BOOL = 12, bool type + -1, // DT_STRING = 13, string type + 1, // DT_DUAL_SUB_INT8 = 14, dual output int8 type + 1, // DT_DUAL_SUB_UINT8 = 15, dual output uint8 type + 8, // DT_COMPLEX64 = 16, complex64 type + 16, // DT_COMPLEX128 = 17, complex128 type + 1, // DT_QINT8 = 18, qint8 type + 2, // DT_QINT16 = 19, qint16 type + 4, // DT_QINT32 = 20, qint32 type + 1, // DT_QUINT8 = 21, quint8 type + 2, // DT_QUINT16 = 22, quint16 type + -1, // DT_RESOURCE = 23, resource type + -1, // DT_STRING_REF = 24, string ref type + 5, // DT_DUAL = 25, dual output type (float + int8) + // DT_UNDEFINED Used to indicate a DataType field has not been set. }; if (data_type >= DT_UNDEFINED) { return -1; @@ -152,10 +152,11 @@ enum DeviceType { CPU = 1, }; +class TensorTypeImpl; struct TensorType { - explicit TensorType(DataType dt) { dt_vec_.push_back(dt); } + explicit TensorType(DataType dt); - TensorType(const std::initializer_list &types) { dt_vec_ = types; } + TensorType(const std::initializer_list &types); static TensorType ALL() { return TensorType{DT_BOOL, DT_COMPLEX128, DT_COMPLEX64, DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT16, @@ -204,7 +205,7 @@ struct TensorType { static TensorType FLOAT() { return TensorType{DT_FLOAT, DT_FLOAT16}; } - std::vector dt_vec_; + std::shared_ptr tensor_type_impl_; }; } // namespace ge diff --git a/inc/external/register/register.h b/inc/external/register/register.h index d8a2211b..87082bee 100644 --- a/inc/external/register/register.h +++ b/inc/external/register/register.h @@ -17,7 +17,6 @@ #ifndef INC_EXTERNAL_REGISTER_REGISTER_H_ #define INC_EXTERNAL_REGISTER_REGISTER_H_ -#include #include #include #include @@ -33,12 +32,12 @@ #include "register/register_fmk_types.h" #include "register/register_types.h" -using std::unique_ptr; -using std::map; using std::make_shared; -using std::to_string; -using std::string; +using std::map; using std::pair; +using std::string; +using std::to_string; +using std::unique_ptr; using std::vector; namespace ge { @@ -46,55 +45,17 @@ class Operator; class TensorDesc; class Tensor; class TBEPluginManager; -} +} // namespace ge namespace domi { -struct OpOutput { - ge::Operator op; - // The output name of op - std::string outputName; -}; - -struct InferShapeContext { - ge::Operator op; - // Input name, input - std::map inputs; -}; - -struct InferShapeOutput { - std::vector outputDescs; - std::vector realDimCnt; -}; - -enum OmgMoveTypeToAttr { - OMG_MOVE_TYPE_DTYPE = 0, - OMG_MOVE_TYPE_VALUE, - OMG_MOVE_TYPE_SHAPE, - OMG_MOVE_TYPE_FORMAT, - OMG_MOVE_TYPE_AXIS, - OMG_MOVE_TYPE_SCALAR_VALUE, - OMG_REMOVE_TYPE_WITH_COND = 1000, -}; - -struct MoveInputToAttrStu { - int inputIdx; - std::string attrName; - OmgMoveTypeToAttr moveType; - bool attrValue; -}; - Status AutoMappingFn(const google::protobuf::Message *op_src, ge::Operator &op); Status AutoMappingFnDynamic(const google::protobuf::Message *op_src, ge::Operator &op, std::map> dynamic_name_attr_value, int in_pos = -1, int out_pos = -1); using google::protobuf::Message; +class OpRegistrationDataImpl; using ParseParamFunc = std::function; -using InferShapeFunc = std::function &)>; -using InferShapeFuncV2 = std::function; -using GetWorkspaceSizeFunc = std::function &)>; -using UpdateOpDescFunc = std::function; -using BuildTeBinFunc = std::function; class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistrationData { public: @@ -110,64 +71,18 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistrationData { OpRegistrationData &ParseParamsFn(const ParseParamFunc &parseParamFn); - OpRegistrationData &InferShapeAndTypeFn(const InferShapeFunc &inferShapeFn); - - OpRegistrationData &InferShapeAndTypeFn(const InferShapeFuncV2 &inferShapeFn); - - OpRegistrationData &UpdateOpDescFn(const UpdateOpDescFunc &updateOpDescFn); - - OpRegistrationData &GetWorkspaceSizeFn(const GetWorkspaceSizeFunc &getWorkspaceSizeFn); - - OpRegistrationData &TEBinBuildFn(const BuildTeBinFunc &buildTeBinFn); - OpRegistrationData &ImplyType(const domi::ImplyType &imply_type); - OpRegistrationData &Formats(const std::initializer_list &input_formats, - const std::initializer_list &output_formats); - - OpRegistrationData &WeightFormats(const std::initializer_list &weight_formats); - - OpRegistrationData &InputFormat(const std::initializer_list> &inputFormats); - OpRegistrationData &OutputFormat(const std::initializer_list> &outputFormats); - OpRegistrationData &InputDataType(const std::initializer_list> &inputDataTypes); - OpRegistrationData &OutputDataType(const std::initializer_list> &outputDataTypes); - OpRegistrationData &InputLimitedTensorDescInfo( - const std::initializer_list> &limitedTensorDescs); - OpRegistrationData &OutputLimitedTensorDescInfo( - const std::initializer_list> &limitedTensorDescs); - - OpRegistrationData &MoveInputToAttr(int inputIdx, const std::string &attrName, OmgMoveTypeToAttr moveType); OpRegistrationData &DelInputWithCond(int inputIdx, const std::string &attrName, bool attrValue); + domi::ImplyType GetImplyType() const; + std::string GetOmOptype() const; + std::set GetOriginOpTypeSet() const; + domi::FrameworkType GetFrameworkType() const; + ParseParamFunc GetParseParamFn() const; + private: - domi::FrameworkType fmk_type_; // Framework type - std::set ori_optype_set_; // OP type in the original model, there may be multiple - std::string om_optype_; // OP type in OM model - domi::ImplyType imply_type_; // Execution type - std::vector input_formats_; // Data formats supported by operator input - std::vector output_formats_; // Data formats supported by operator output - std::vector weight_formats_; // Data format supported by operator weight - - ParseParamFunc parseParamFn_; // ParseParam function - InferShapeFunc inferShapeFn_; // InferShape function - InferShapeFuncV2 inferShapeFnV2_; // InferShape function - GetWorkspaceSizeFunc getWorkspaceSizeFn_; // GetWorkspaceSizeFunc function - UpdateOpDescFunc updateOpDescFn_; - BuildTeBinFunc buildTeBinFn_; - // Input formats list supported by tbe operators - std::vector> supportedInputFormats_; - // Output formats list supported by tbe operators - std::vector> supportedOutputFormats_; - // Input datatypes list supported by tbe operators - std::vector> supportedInputDataTypes_; - // Output datatypes list supported by tbe operators - std::vector> supportedOutputDataTypes_; - // Input tensordesinfo list supported by tbe operator - std::vector> inputLimitedTensorDescs_; - // Output tensordesinfo list supported by tbe operator - std::vector> outputLimitedTensorDescs_; - - std::vector moveInputToAttrVec_; + std::shared_ptr impl_; friend class OpRegistry; friend class OpRegistrationTbe; friend class ge::TBEPluginManager; @@ -181,19 +96,12 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpReceiver { #define REGISTER_CUSTOM_OP(name) REGISTER_CUSTOM_OP_UNIQ_HELPER(__COUNTER__, name) #define REGISTER_CUSTOM_OP_UNIQ_HELPER(ctr, name) REGISTER_CUSTOM_OP_UNIQ(ctr, name) -#define REGISTER_CUSTOM_OP_UNIQ(ctr, name) \ - static OpReceiver register_op##ctr \ - __attribute__((unused)) = \ - OpRegistrationData(name) +#define REGISTER_CUSTOM_OP_UNIQ(ctr, name) \ + static OpReceiver register_op##ctr __attribute__((unused)) = OpRegistrationData(name) } // namespace domi namespace ge { -using OpOutput = domi::OpOutput; -using InferShapeContext = domi::InferShapeContext; -using InferShapeOutput = domi::InferShapeOutput; -using OmgMoveTypeToAttr = domi::OmgMoveTypeToAttr; -using MoveInputToAttrStu = domi::MoveInputToAttrStu; using OpRegistrationData = domi::OpRegistrationData; using OpReceiver = domi::OpReceiver; -} +} // namespace ge #endif // INC_EXTERNAL_REGISTER_REGISTER_H_ diff --git a/inc/external/register/register_fmk_types.h b/inc/external/register/register_fmk_types.h index e24957cc..ef469f3a 100644 --- a/inc/external/register/register_fmk_types.h +++ b/inc/external/register/register_fmk_types.h @@ -31,12 +31,6 @@ enum FrameworkType { FMK_TYPE_A_NN, FMK_TYPE_RESERVED, }; - -struct TEBinInfo { - std::string bin_file_path; - std::string json_file_path; - std::string ddk_version; -}; } // namespace domi #endif // INC_EXTERNAL_REGISTER_REGISTER_FMK_TYPES_H_ diff --git a/inc/framework/common/debug/ge_log.h b/inc/framework/common/debug/ge_log.h index 1556fd07..bdc865de 100644 --- a/inc/framework/common/debug/ge_log.h +++ b/inc/framework/common/debug/ge_log.h @@ -44,6 +44,8 @@ inline bool IsLogEnable(int module_name, int log_level) noexcept { return false; } +/*lint --emacro((773),GE_TIMESTAMP_START)*/ +/*lint -esym(773,GE_TIMESTAMP_START)*/ #define GE_TIMESTAMP_START(stage) uint64_t startUsec_##stage = ge::GetCurrentTimestap() #define GE_TIMESTAMP_END(stage, stage_name) \ diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h index 1aa4111c..fb5418b0 100644 --- a/inc/framework/common/debug/log.h +++ b/inc/framework/common/debug/log.h @@ -29,18 +29,7 @@ using cce::CC_STATUS_SUCCESS; using cce::ccStatus_t; -#if !defined(__ANDROID__) && !defined(ANDROID) -#define DOMI_LOGE(...) DAV_LOGE("DOMI", __VA_ARGS__) -#else -#include -#if defined(BUILD_VERSION_PERF) -#define DOMI_LOGE(fmt, ...) -#else -// The Android system has strict log control. Do not modify the log. -#define DOMI_LOGE(fmt, ...) \ - __android_log_print(ANDROID_LOG_ERROR, "NPU_FMK", "%s %s(%d)::" #fmt, __FILE__, __FUNCTION__, __LINE__, ##__VA_ARGS__) -#endif -#endif +#define GE_LOGE(...) DAV_LOGE("GE", __VA_ARGS__) // ge marco #define GE_LOGI_IF(condition, ...) \ @@ -53,9 +42,9 @@ using cce::ccStatus_t; GELOGW(__VA_ARGS__); \ } -#define GE_LOGE_IF(condition, ...) \ - if ((condition)) { \ - GELOGE(ge::FAILED, __VA_ARGS__); \ +#define GE_LOGE_IF(condition, ...) \ + if ((condition)) { \ + GE_LOGE(__VA_ARGS__); \ } // If expr is not SUCCESS, print the log and return the same value @@ -63,7 +52,7 @@ using cce::ccStatus_t; do { \ const ge::Status _status = (expr); \ if (_status != ge::SUCCESS) { \ - GELOGE(ge::FAILED, __VA_ARGS__); \ + GE_LOGE(__VA_ARGS__); \ return _status; \ } \ } while (0); @@ -73,7 +62,7 @@ using cce::ccStatus_t; do { \ const ge::Status _status = (expr); \ if (_status != ge::SUCCESS) { \ - GELOGE(ge::FAILED, __VA_ARGS__); \ + GE_LOGE(__VA_ARGS__); \ } \ } while (0); @@ -102,11 +91,25 @@ using cce::ccStatus_t; (void)msg.append(ge::StringUtils::FormatString(__VA_ARGS__)); \ (void)msg.append( \ ge::StringUtils::FormatString(" Error Code:0x%X(%s)", _status, GET_ERRORNO_STR(_status).c_str())); \ - GELOGE(ge::FAILED, "%s", msg.c_str()); \ + GE_LOGE("%s", msg.c_str()); \ return _status; \ } \ } while (0); +// If expr is not true, print the Info log and return the specified status +#define GE_CHK_BOOL_RET_STATUS_LOGI(expr, _status, ...) \ + do { \ + bool b = (expr); \ + if (!b) { \ + std::string msg; \ + (void)msg.append(StringUtils::FormatString(__VA_ARGS__)); \ + (void)msg.append( \ + StringUtils::FormatString(" Check result false, status: 0x%X %s", _status, GET_ERRORNO_STR(_status).c_str())); \ + GELOGI("%s", msg.c_str()); \ + return _status; \ + } \ + } while (0); + // If expr is not true, print the log and return the specified status #define GE_CHK_BOOL_RET_STATUS_NOLOG(expr, _status, ...) \ do { \ @@ -121,7 +124,7 @@ using cce::ccStatus_t; { \ bool b = (expr); \ if (!b) { \ - GELOGE(ge::FAILED, __VA_ARGS__); \ + GE_LOGE(__VA_ARGS__); \ exec_expr; \ } \ }; @@ -145,12 +148,22 @@ using cce::ccStatus_t; } \ }; +// If expr is not true, print the log and execute a custom statement +#define GE_CHK_BOOL_TRUE_EXEC_INFO(expr, exec_expr, ...) \ + { \ + bool b = (expr); \ + if (b) { \ + GELOGI(__VA_ARGS__); \ + exec_expr; \ + } \ + }; + // If expr is true, print logs and execute custom statements #define GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(expr, exec_expr, ...) \ { \ bool b = (expr); \ if (b) { \ - GELOGE(ge::FAILED, __VA_ARGS__); \ + GE_LOGE(__VA_ARGS__); \ exec_expr; \ } \ }; @@ -164,12 +177,23 @@ using cce::ccStatus_t; } \ }; +// If expr is not SUCCESS, print the log and execute the expression + return +#define GE_CHK_BOOL_TRUE_RET_VOID(expr, exec_expr, ...) \ + { \ + bool b = (expr); \ + if (b) { \ + GE_LOGE(__VA_ARGS__); \ + exec_expr; \ + return; \ + } \ + }; + // If expr is not SUCCESS, print the log and execute the expression + return _status #define GE_CHK_BOOL_TRUE_EXEC_RET_STATUS(expr, _status, exec_expr, ...) \ { \ bool b = (expr); \ if (b) { \ - GELOGE(ge::FAILED, __VA_ARGS__); \ + GE_LOGE(__VA_ARGS__); \ exec_expr; \ return _status; \ } \ @@ -186,52 +210,62 @@ using cce::ccStatus_t; // -----------------runtime related macro definitions------------------------------- // If expr is not RT_ERROR_NONE, print the log -#define GE_CHK_RT(expr) \ - do { \ - rtError_t _rt_ret = (expr); \ - if (_rt_ret != RT_ERROR_NONE) { \ - GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \ - } \ +#define GE_CHK_RT(expr) \ + do { \ + rtError_t _rt_ret = (expr); \ + if (_rt_ret != RT_ERROR_NONE) { \ + GE_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ + } \ } while (0); // If expr is not RT_ERROR_NONE, print the log and execute the exec_expr expression -#define GE_CHK_RT_EXEC(expr, exec_expr) \ - { \ - rtError_t _rt_ret = (expr); \ - if (_rt_ret != RT_ERROR_NONE) { \ - GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \ - exec_expr; \ - } \ +#define GE_CHK_RT_EXEC(expr, exec_expr) \ + { \ + rtError_t _rt_ret = (expr); \ + if (_rt_ret != RT_ERROR_NONE) { \ + GE_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ + exec_expr; \ + } \ } // If expr is not RT_ERROR_NONE, print the log and return -#define GE_CHK_RT_RET(expr) \ - do { \ - rtError_t _rt_ret = (expr); \ - if (_rt_ret != RT_ERROR_NONE) { \ - GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \ - return ge::RT_FAILED; \ - } \ +#define GE_CHK_RT_RET(expr) \ + do { \ + rtError_t _rt_ret = (expr); \ + if (_rt_ret != RT_ERROR_NONE) { \ + GE_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ + return ge::RT_FAILED; \ + } \ } while (0); // ------------------------cce related macro definitions---------------------------- // If expr is not CC_STATUS_SUCCESS, print the log -#define GE_CHK_CCE(expr) \ - do { \ - ccStatus_t _cc_ret = (expr); \ - if (_cc_ret != CC_STATUS_SUCCESS) { \ - GELOGE(ge::CCE_FAILED, "Call cce api failed, ret: 0x%X", _cc_ret); \ - } \ +#define GE_CHK_CCE(expr) \ + do { \ + ccStatus_t _cc_ret = (expr); \ + if (_cc_ret != CC_STATUS_SUCCESS) { \ + GE_LOGE("Call cce api failed, ret: 0x%X", _cc_ret); \ + } \ + } while (0); + +// If expr is not CC_STATUS_SUCCESS, print the log and execute the exec_expr expression +#define GE_CHK_CCE_EXEC(expr, exec_expr) \ + do { \ + ccStatus_t _cc_ret = (expr); \ + if (_cc_ret != CC_STATUS_SUCCESS) { \ + GE_LOGE("Call cce api failed, ret: 0x%X", _cc_ret); \ + exec_expr; \ + } \ } while (0); // If expr is not CC_STATUS_SUCCESS, print the log and return -#define GE_CHK_CCE_RET(expr) \ - do { \ - ccStatus_t _cc_ret = (expr); \ - if (_cc_ret != CC_STATUS_SUCCESS) { \ - GELOGE(ge::CCE_FAILED, "Call cce api failed, ret: 0x%X", _cc_ret); \ - return ge::CCE_FAILED; \ - } \ +#define GE_CHK_CCE_RET(expr) \ + do { \ + ccStatus_t _cc_ret = (expr); \ + if (_cc_ret != CC_STATUS_SUCCESS) { \ + GE_LOGE("Call cce api failed, ret: 0x%X", _cc_ret); \ + return ge::CCE_FAILED; \ + } \ } while (0); // If expr is true, execute exec_expr without printing logs @@ -247,8 +281,37 @@ using cce::ccStatus_t; try { \ exec_expr0; \ } catch (const std::bad_alloc &) { \ - GELOGE(ge::FAILED, "Make shared failed"); \ + GE_LOGE("Make shared failed"); \ exec_expr1; \ } +#define GE_CHECK_INT32_MUL_OVERFLOW(a, b, ...) \ + do { \ + if ((a) > 0) { \ + if ((b) > 0) { \ + if ((a) > (INT32_MAX / (b))) { \ + GE_LOGE(__VA_ARGS__); \ + return ge::FAILED; \ + } \ + } else { \ + if ((b) < (INT32_MIN / (a))) { \ + GE_LOGE(__VA_ARGS__); \ + return ge::FAILED; \ + } \ + } \ + } else { \ + if ((b) > 0) { \ + if ((a) < (INT32_MAX / (b))) { \ + GE_LOGE(__VA_ARGS__); \ + return ge::FAILED; \ + } \ + } else { \ + if (((a) != 0) && ((b) < (INT32_MAX / (a)))) { \ + GE_LOGE(__VA_ARGS__); \ + return ge::FAILED; \ + } \ + } \ + } \ + } while (0); + #endif // INC_FRAMEWORK_COMMON_DEBUG_LOG_H_ diff --git a/inc/framework/common/fmk_error_codes.h b/inc/framework/common/fmk_error_codes.h index 289cbd68..ec1f26d0 100644 --- a/inc/framework/common/fmk_error_codes.h +++ b/inc/framework/common/fmk_error_codes.h @@ -1,4 +1,4 @@ -/** +/** * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/inc/framework/common/fmk_types.h b/inc/framework/common/fmk_types.h index e7ab6d6e..f84390da 100644 --- a/inc/framework/common/fmk_types.h +++ b/inc/framework/common/fmk_types.h @@ -1,4 +1,4 @@ -/** +/** * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,4 +20,4 @@ #include "graph/types.h" #include "register/register_types.h" -#endif // INC_FRAMEWORK_COMMON_FMK_TYPES_H_ +#endif // INC_FRAMEWORK_COMMON_FMK_TYPES_H_ diff --git a/inc/framework/common/ge_inner_error_codes.h b/inc/framework/common/ge_inner_error_codes.h index 123aafdf..c6bfc576 100644 --- a/inc/framework/common/ge_inner_error_codes.h +++ b/inc/framework/common/ge_inner_error_codes.h @@ -14,79 +14,78 @@ * limitations under the License. */ +/*lint -e* */ #ifndef INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_ #define INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_ #include #include - #include "ge/ge_api_error_codes.h" namespace ge { // System ID -enum SystemIdType { kSysidGE = 8 }; - +enum SystemIdType { SYSID_GE = 8 }; // Runtime location enum LogRuntime { - KRtHost = 0b01, - kRtDevice = 0b10, + RT_HOST = 0b01, + RT_DEVICE = 0b10, }; // Sub model enum SubModuleId { - kCommonModule = 0, - kClientModule = 1, - kInitModule = 2, - kSessionModule = 3, - kGraphModule = 4, - kEngineMOdule = 5, - kOpsModule = 6, - kPluginModule = 7, - kRuntimeModule = 8, - kExecutorModule = 9, - kGeneratorModule = 10, + COMMON_MODULE = 0, + CLIENT_MODULE = 1, + INIT_MODULE = 2, + SESSION_MODULE = 3, + GRAPH_MODULE = 4, + ENGINE_MODULE = 5, + OPS_MODULE = 6, + PLUGIN_MODULE = 7, + RUNTIME_MODULE = 8, + EXECUTOR_MODULE = 9, + GENERATOR_MODULE = 10, }; // Error code type enum ErrorCodeType { - kErrorCode = 0b01, - kExceptionCode = 0b10, + ERROR_CODE = 0b01, + EXCEPTION_CODE = 0b10, }; // Error level enum ErrorLevel { - kCommonLevel = 0b000, - kSuggestionLevel = 0b001, - kMinorLevel = 0b010, - kMajorLevel = 0b011, - kCriticalLevel = 0b100, + COMMON_LEVEL = 0b000, + SUGGESTION_LEVEL = 0b001, + MINOR_LEVEL = 0b010, + MAJOR_LEVEL = 0b011, + CRITICAL_LEVEL = 0b100, }; -// The error code is defined by the following macros +// Each module defines error codes using the following macros #define GE_ERRORNO_COMMON(name, value, desc) \ - GE_ERRORNO(KRtHost, kErrorCode, kCommonLevel, kSysidGE, kCommonModule, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, COMMON_MODULE, name, value, desc) #define GE_ERRORNO_CLIENT(name, value, desc) \ - GE_ERRORNO(KRtHost, kErrorCode, kCommonLevel, kSysidGE, kClientModule, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, CLIENT_MODULE, name, value, desc) #define GE_ERRORNO_INIT(name, value, desc) \ - GE_ERRORNO(KRtHost, kErrorCode, kCommonLevel, kSysidGE, kInitModule, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, INIT_MODULE, name, value, desc) #define GE_ERRORNO_SESSION(name, value, desc) \ - GE_ERRORNO(KRtHost, kErrorCode, kCommonLevel, kSysidGE, kSessionModule, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, SESSION_MODULE, name, value, desc) #define GE_ERRORNO_GRAPH(name, value, desc) \ - GE_ERRORNO(KRtHost, kErrorCode, kCommonLevel, kSysidGE, kGraphModule, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GRAPH_MODULE, name, value, desc) #define GE_ERRORNO_ENGINE(name, value, desc) \ - GE_ERRORNO(KRtHost, kErrorCode, kCommonLevel, kSysidGE, kEngineMOdule, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, ENGINE_MODULE, name, value, desc) #define GE_ERRORNO_OPS(name, value, desc) \ - GE_ERRORNO(KRtHost, kErrorCode, kCommonLevel, kSysidGE, kOpsModule, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, OPS_MODULE, name, value, desc) #define GE_ERRORNO_PLUGIN(name, value, desc) \ - GE_ERRORNO(KRtHost, kErrorCode, kCommonLevel, kSysidGE, kPluginModule, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, PLUGIN_MODULE, name, value, desc) #define GE_ERRORNO_RUNTIME(name, value, desc) \ - GE_ERRORNO(KRtHost, kErrorCode, kCommonLevel, kSysidGE, kRuntimeModule, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, RUNTIME_MODULE, name, value, desc) #define GE_ERRORNO_EXECUTOR(name, value, desc) \ - GE_ERRORNO(kRtDevice, kErrorCode, kCommonLevel, kSysidGE, kExecutorModule, name, value, desc) + GE_ERRORNO(RT_DEVICE, ERROR_CODE, COMMON_LEVEL, SYSID_GE, EXECUTOR_MODULE, name, value, desc) #define GE_ERRORNO_GENERATOR(name, value, desc) \ - GE_ERRORNO(KRtHost, kErrorCode, kCommonLevel, kSysidGE, kGeneratorModule, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GENERATOR_MODULE, name, value, desc) -// Get the description of the error code +// Get error code description #define GE_GET_ERRORNO_STR(value) ge::StatusFactory::Instance()->GetErrDesc(value) // Common module error code definition @@ -206,10 +205,9 @@ GE_ERRORNO_GRAPH(GE_GRAPH_NODE_SEARCHER_GET_GRAPH_REBUILD_FAILED, 60, GE_ERRORNO_GRAPH(GE_GRAPH_NODE_SEARCHER_SET_GRAPH_FINISH_REBUILD_GRAPH_FAILED, 61, "Failed set graph finish rebuild in node searcher."); // 1343242301 GE_ERRORNO_GRAPH(GE_GRAPH_VARIABLE_OP_PASS_FAILED, 62, "Failed to run variable pass."); // 1343242302 - // Optimize errocode -GE_ERRORNO_GRAPH(TO_BE_DELETED, 63, "The node of the graph to be deleted."); // 1343242303 -GE_ERRORNO_GRAPH(NOT_CHANGED, 64, "The node of the graph no changed."); // 1343242304 +GE_ERRORNO_GRAPH(TO_BE_DELETED, 200, "The node of the graph to be deleted."); +GE_ERRORNO_GRAPH(NOT_CHANGED, 201, "NThe node of the graph not changed."); // Engine_manager module error code definition GE_ERRORNO_ENGINE(GE_ENG_INIT_FAILED, 0, "Failed to initialize engine."); // 1343246336 diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h old mode 100755 new mode 100644 index 28c7a811..c3a26c49 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -137,7 +137,7 @@ class ModelListener { struct Options { int64_t session_id; int32_t device_id; - int64_t job_id; + std::string job_id; bool isUseHcom; bool deployMode; bool isAICPUMode; @@ -149,5 +149,4 @@ struct Options { int32_t physical_device_id; }; } // namespace ge - #endif // INC_FRAMEWORK_COMMON_GE_TYPES_H_ diff --git a/inc/framework/common/gflags_util.h b/inc/framework/common/gflags_util.h index 33a807bc..94d66ffb 100644 --- a/inc/framework/common/gflags_util.h +++ b/inc/framework/common/gflags_util.h @@ -23,11 +23,6 @@ namespace ge { class GflagsUtils { public: - /// - /// @brief Determines whether the parameter is true - /// @param name name parameter name - /// @return true or false - /// static bool IsSetCommandTrue(const char *name) { std::string out; return gflags::GetCommandLineOption(name, &out) && out == "true"; diff --git a/inc/framework/common/helper/model_helper.h b/inc/framework/common/helper/model_helper.h old mode 100755 new mode 100644 index 2d17779f..6513265f --- a/inc/framework/common/helper/model_helper.h +++ b/inc/framework/common/helper/model_helper.h @@ -19,6 +19,7 @@ #include #include +#include #include "common/fmk_types.h" #include "common/helper/om_file_helper.h" diff --git a/inc/framework/common/helper/om_file_helper.h b/inc/framework/common/helper/om_file_helper.h old mode 100755 new mode 100644 index 2fa06967..1341243b --- a/inc/framework/common/helper/om_file_helper.h +++ b/inc/framework/common/helper/om_file_helper.h @@ -35,8 +35,8 @@ struct ModelPartition { }; struct OmFileContext { - vector partition_datas_; - vector partition_table_; + std::vector partition_datas_; + std::vector partition_table_; uint32_t model_data_len_; }; @@ -78,7 +78,7 @@ class OmFileSaveHelper { Status AddPartition(ModelPartition &partition); - const vector &GetModelPartitions() const; + const std::vector &GetModelPartitions() const; Status SaveModel(const SaveParam &save_param, const char *target_file); @@ -88,4 +88,5 @@ class OmFileSaveHelper { OmFileContext context_; }; } // namespace ge +/*lint +e148*/ #endif // INC_FRAMEWORK_COMMON_HELPER_OM_FILE_HELPER_H_ diff --git a/inc/framework/common/l2_cache_optimize.h b/inc/framework/common/l2_cache_optimize.h index 2fe70179..a68ebcf2 100644 --- a/inc/framework/common/l2_cache_optimize.h +++ b/inc/framework/common/l2_cache_optimize.h @@ -28,11 +28,14 @@ #include "common/util.h" #include "graph/compute_graph.h" +using std::vector; + namespace ge { // Size of RC memory alignment, 2M -const size_t ALIGN_SIZE = 2097152; -const uint32_t RC_VALUE_DEFAULT = 1; -const uint32_t RC_VALUE_MAC = 32; +constexpr size_t ALIGN_SIZE = 2097152; + +constexpr uint32_t RC_VALUE_DEFAULT = 1; +constexpr uint32_t RC_VALUE_MAX = 32; // RC data type classification enum RCType { @@ -100,7 +103,7 @@ class L2CacheOptimize { void HandOPoutput(ge::NodePtr node, vector &outputList, vector &blocks); // maximum common divisor - uint32_t Measure(uint32_t x, uint32_t y) const { + uint32_t Measure(uint32_t x, uint32_t y) { if (x == 0 || y == 0) return RC_VALUE_DEFAULT; uint32_t z = y; while (x % y != 0) { diff --git a/inc/framework/common/op/attr_define.h b/inc/framework/common/op/attr_define.h new file mode 100644 index 00000000..1e2c3ab4 --- /dev/null +++ b/inc/framework/common/op/attr_define.h @@ -0,0 +1,806 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_FRAMEWORK_COMMON_OP_ATTR_DEFINE_H_ +#define INC_FRAMEWORK_COMMON_OP_ATTR_DEFINE_H_ + +#include +#include "framework/common/fmk_types.h" + +namespace domi { +// Public Attribute +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_NAME; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_TYPE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_WEIGHT_NAME; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_IS_QUANTIZE_FACTOR; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_ALPHA; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_BETA; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_PADMODE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_PADMODES; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_MODE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_FILTER; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_BIAS; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_BIAS_TERM; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_HAS_BIAS_VALUE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_PAD; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_PADS; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_PAD_SIZE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_PAD_MODE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_SCALE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_WINDOWS; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_GLOBAL_POOLING; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_CEIL_MODE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_STRIDE_SIZE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_RELU_FLAG; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_ALGO; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_FORMAT; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_FILTER_FORMAT; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_LRN_K; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_LRN_NORM_REGION; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_LRN_LOCAL_SIZE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_LRN_ALPHA; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_LRN_BETA; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_AXIS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_BROADCAST; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_OUTPUT_NUM; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_TIDX; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_TPADDINGS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_IMG_H; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_IMG_W; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NET_H; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NET_W; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_TMULTIPLES; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_MULTIPLES; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_T; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_N; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_TSHAPE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_NAN_OPT; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_AIPP; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string NEW_AIPP_CONV_OP; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_SESSION_GRAPH_ID; + +static const std::string ATTR_NAME_INPUT_DATATYPE = "input_datatype"; +static const std::string ATTR_NAME_OUTPUT_DATATYPE = "output_datatype"; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_MULTISHAPE_BATCHLIST; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_MULTISHAPE_BATCHLIST_SIZE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_MODEL_BATCH_NUM; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_INPUT_FORMAT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_OUTPUT_FORMAT; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_FRAMEWORK_NODE_DEF; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_FRAMEWORK_OP_DEF; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_FRAMEWORK_FWK_TYPE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_FRAMEWORK_FUNC_DEF; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_INPUT_TENSOR_DESC; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_OUTPUT_TENSOR_DESC; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_INFERRED_FORMAT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_PRED_PERMUTE_DELETED; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_IGNORE_PRED_FORMAT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_WEIGHTS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_BROACAST_REAL_DIM_CNT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_DIM_ALIGN; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_RTSWITCH_RECV_EVENT_ID; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_AUTOMIC_ADD_START; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE; + +// To be deleted +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_TO_BE_DELETED; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PERMUTE_RESHAPE_FUSION; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PERMUTE_RESHAPE_FUSION_CONV_PROPOSAL; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PERMUTE_RESHAPE_FUSION_CONV_DECODEBBOX; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PERMUTE_RESHAPE_FUSION_BOX_TYPE_NUM; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_MBOX_LOC_FUSION; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_MBOX_CONF_FUSION; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_MBOX_OCR_FUSION; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_MBOX_FUSION_BOX_TYPE_NUM; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_RESHAPE_SLICE_CONCAT_FUSION; + +// Refinedet +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string REFINEDET_MBOX_LOC_FUSION; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string REFINEDET_MBOX_CONF_FUSION; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string REFINEDET_MBOX_FUSION_BOX_TYPE_NUM; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string REFINEDET_RESHAPE_SLICE_CONCAT_FUSION; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_PRIORBOX_CONCAT; + +// _Arg +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_INDEX; +// _RetVal +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RETVAL_ATTR_NAME_INDEX; +// Data +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DATA_ATTR_NAME_DATA_TYPE; + +// Send +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SEND_ATTR_EVENT_ID; + +// Recv +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RECV_ATTR_EVENT_ID; + +// convolution +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_COEF; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_STRIDE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_STRIDES; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_DILATION; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_DILATIONS; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONV_ATTR_NAME_MODE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONV_ATTR_NAME_ALGO; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONV_ATTR_NAME_GROUP; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONV_ATTR_NAME_PAD_MODE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONV_ATTR_NAME_PAD; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONV_ATTR_NAME_STRIDE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONV_ATTR_NAME_DILATION; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONV_ATTR_NAME_NUM_OUTPUT; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONV_ATTR_NAME_KERNEL; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONV_ATTR_NAME_FILTER; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONV_ATTR_NAME_BIAS; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONV_ATTR_NAME_RELU_FLAG; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONV_ATTR_NAME_ADJ; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONV_ATTR_NAME_TARGET_SHAPE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONV_ATTR_NAME_BEFORE_PAD; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONV_ATTR_NAME_HAS_BIAS; + +// Pooling +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POOLING_ATTR_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POOLING_ATTR_NAN_OPT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POOLING_ATTR_PAD_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POOLING_ATTR_GLOBAL_POOLING; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POOLING_ATTR_WINDOW; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POOLING_ATTR_PAD; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POOLING_ATTR_STRIDE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POOLING_ATTR_CEIL_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POOLING_ATTR_DATA_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POOLING_ATTR_BEFORE_PAD; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POOLING_ATTR_NAME_ALGO; + +// Eltwise +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ELTWISE_ATTR_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ELTWISE_ATTR_COEFF; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ELTWISE_ATTR_WEIGHT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ELTWISE_ATTR_RELU_FLAG; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ELTWISE_ATTR_ALPHA; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ELTWISE_ATTR_BETA; + +// BatchNorm +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string BATCHNORM_ATTR_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string BATCHNORM_ATTR_EPSILON; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string BATCHNORM_ATTR_USE_GLOBAL_STATS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string BATCHNORM_ATTR_MOVING_AVERAGE_FRACTION; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string BATCHNORM_ATTR_ESTIMATED_MEAN; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string BATCHNORM_ATTR_ESTIMATED_VARIANCE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string BATCHNORM_ATTR_SCALE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string BATCHNORM_ATTR_BIAS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string BATCHNORM_ATTR_DATA_FORMAT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string BATCHNORM_ATTR_IS_TRAINING; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string BATCHNORM_ATTR_IS_TRAINING_FUSION; + +// Huberloss +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string HUBER_LOSS_ATTR_DELTA; + +// SSDRealDivTileMul +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_REAL_DIV_TILE_MUL_ATTR_TILE_PARA; + +// SSDSumMulRealDivMean +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string + SSD_SUM_MUL_REALDIV_MEAN_ATTR_REDUCTION_INDICES; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_SUM_MUL_REALDIV_MEAN_ATTR_AXIS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_SUM_MUL_REALDIV_MEAN_ATTR_MEAN_PARA; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_SUM_MUL_REALDIV_MEAN_ATTR_HAS_SUM; + +/// ConcatFive2Four +/// ConcatFour2Five +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_CLASS_NUM; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_FEATURE_MAP_SIZE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string TRANS_FOR_LOSS_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_BOX_TYPE_NUM; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_FEATURE_MAP_HIGH; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_FEATURE_MAP_WIDTH; + +// Scale +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SCALE_ATTR_SCALE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SCALE_ATTR_BIAS; + +// FullConnection +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string FULL_CONNECTION_ATTR_FILTER; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string FULL_CONNECTION_ATTR_BIAS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string FULL_CONNECTION_ATTR_NUM_OUTPUT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string FULL_CONNECTION_ATTR_RELU_FLAG; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string FULL_ATTR_NAME_ALGO; + +// SoftmaxOpParams +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SOFTMAX_ATTR_ALGO; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SOFTMAX_ATTR_MODE; + +// SparseSoftmaxCrossEntropy +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SPARSE_SOFTMAX_CROSS_ENTROPY_ATTR_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SPARSE_SOFTMAX_CROSS_ENTROPY_IS_GRAD; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SOFTMAX_CROSS_ENTROPY_LABELSMOOTHING; + +// Activation +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ACTIVATION_ATTR_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ACTIVATION_ATTR_COEF; + +// Concat +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONCAT_ATTR_NAME_AXIS; + +// Const +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONST_ATTR_NAME_DATA_TRANSTYPE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONST_ATTR_NAME_OUTPUT_FORMAT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONST_ATTR_NAME_OUTPUT_TYPE; + +// Roipooling +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ROIPOOLING_ATTR_NAME_POOLED_H; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ROIPOOLING_ATTR_NAME_POOLED_W; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ROIPOOLING_ATTR_NAME_SPATIAL_SCALE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ROIPOOLING_ATTR_NAME_RIO_POOLING_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ROIPOOLING_ATTR_NAME_POOLING_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ROIPOOLING_ATTR_NAME_SAMPLING_RATIO; + +// DetectionOutput +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_NUM_CLASSES; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_OCR_NUM_CLASSES; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_NMS_THRESHOLD; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_TOP_K; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_CONFIDENCE_THRESHOLD; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_IMG_H; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_IMG_W; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_BATCH_SIZE; +// Ssd DetectionOutput +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_ETA; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_SHARED_LOCATION; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_BACKGROUND_LABEL_ID; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_CODE_TYPE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string + DETECTIONOUTPUT_ATTR_VARIANCE_ENCODED_IN_TARGET; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_KEEP_TOP_K; +// Refinedet DetectionOutput +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_OBJECTNESS_SCORE; +// yolo DetectionOutput +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_ClASSES; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_BIASES; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_RELATIVE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_OBJECTNESS_THRESHOLD; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_CLASS_THRESHOLD; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_POST_TOP_K; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_IOU_THRESHOLD_DECAY; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_COOR_SCALE_FACTOR; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DETECTIONOUTPUT_ATTR_YOLO_VERSION; + +// DetectionPostprocess +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POSTPROCESS_ATTR_NAME_CLS_NUM; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POSTPROCESS_ATTR_NAME_CONF_THRESH; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POSTPROCESS_ATTR_NAME_NMS_THRESH; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POSTPROCESS_ATTR_POST_NMS_TOPN; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POSTPROCESS_ATTR_NAME_BBOX_REG_WEIGHT; + +// Spatialtransfrom +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SPTIALTF_ATTR_NAME_OUTPUT_H; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SPTIALTF_ATTR_NAME_OUTPUT_W; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SPTIALTF_ATTR_NAME_BORDER_VALUE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SPTIALTF_ATTR_NAME_AFFINE_TRANSFORM; + +// Proposal +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PROPOSAL_ATTR_NAME_FEAT_STRIDE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PROPOSAL_ATTR_NAME_BASE_SIZE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PROPOSAL_ATTR_NAME_MIN_SIZE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PROPOSAL_ATTR_NAME_RATIO; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PROPOSAL_ATTR_NAME_SCALE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PROPOSAL_ATTR_NAME_PRE_NMS_TOPN; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PROPOSAL_ATTR_NAME_POST_NMS_TOPN; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PROPOSAL_ATTR_NAME_NMS_THRESH; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PROPOSAL_ATTR_NAME_TOP_SIZE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PROPOSAL_ATTR_IMG_H; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PROPOSAL_ATTR_IMG_W; +// Softmax +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SOFTMAX_ATTR_AXIS; + +// Permute +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PERMUTE_ATTR_ORDER; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PERMUTE_ATTR_PERM; + +// SSD Normalize +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSDNORMALIZE_ATTR_ACCROSS_SPATIAL; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSDNORMALIZE_ATTR_CHANNEL_SHARED; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSDNORMALIZE_ATTR_EPS; + +// Flatten +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string FLATTEN_ATTR_AXIS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string FLATTEN_ATTR_END_AXIS; + +// SsdPRIORBOX +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_PRIOR_BOX_ATTR_FLIP; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_PRIOR_BOX_ATTR_CLIP; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_PRIOR_BOX_ATTR_IMG_H; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_PRIOR_BOX_ATTR_IMG_W; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_PRIOR_BOX_ATTR_STEP_H; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_PRIOR_BOX_ATTR_STEP_W; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_PRIOR_BOX_ATTR_OFFSET; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_PRIOR_BOX_ATTR_MIN_SIZE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_PRIOR_BOX_ATTR_MAX_SIZE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_PRIOR_BOX_ATTR_MIN_SIZE_NUM; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_PRIOR_BOX_ATTR_MAX_SIZE_NUM; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_PRIOR_BOX_ATTR_ASPECT_RATIO; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_PRIOR_BOX_ATTR_ASPECT_RATIO_NUM; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_PRIOR_BOX_ATTR_VARIANCE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_PRIOR_BOX_ATTR_VARIANCE_NUM; + +// RefinedetPRIORBOX +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string REFINEDET_PRIOR_BOX_ATTR_VARIANCE_NUM; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string REFINEDET_PRIOR_BOX_ATTR_VARIANCE; +// PRelu +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PRELU_ATTR_CHANNEL_SHARED; + +// Psroi pooling +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PSROIPOOLING_ATTR_SPATIAL_SCALE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PSROIPOOLING_ATTR_OUTPUT_DIM; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PSROIPOOLING_ATTR_GROUP_SIZE; + +// Power +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POWER_ATTR_NAME_POWER; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POWER_ATTR_NAME_SCALE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POWER_ATTR_NAME_SHIFT; + +// Log +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string LOG_ATTR_NAME_SCALE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string LOG_ATTR_NAME_SHIFT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string LOG_ATTR_NAME_BASE; + +// Pack +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PACK_ATTR_NAME_NUM; + +// Dynamic stitch +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DYNAMIC_STITCH_ATTR_NAME_NUM; + +// Unpack +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string UNPACK_ATTR_NAME_NUM; +// Gathernd +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string GATHERND_ATTR_NAME_TINDICES; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string GATHERND_ATTR_NAME_TPARAMS; + +// Argmax +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ARGMAX_ATTR_NAME_TOPK; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ARGMAX_ATTR_NAME_REDUCESIZE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ARGMAX_ATTR_NAME_REDUCESTRIDE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ARGMAX_ATTR_NAME_OUTMAX; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ARGMAX_ATTR_NAME_AXIS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ARGMAX_ATTR_NAME_AXISTYPE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ARGMAX_ATTR_NAME_KEEPDIMS; + +// Upsample +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string UPSAMPLE_ATTR_NAME_SCALE_H; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string UPSAMPLE_ATTR_NAME_SCALE_W; + +// Relu +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_NEGATIVE_SLOPE; + +// FreeSpaceExtract +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string FREESPACEEXTRACT_ATTR_NAME_ORG_HEIGHT; + +// split +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SPLIT_ATTR_NAME_SLICE_POINT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SPLIT_ATTR_NAME_SIZE_SPLIT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SPLIT_ATTR_NAME_NUM_SPLIT; + +// Tvm +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string TVM_ATTR_NAME_MAGIC; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string TVM_ATTR_NAME_BLOCKDIM; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string TVM_ATTR_NAME_METADATA; + +// Squeeze +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SQUEEZE_ATTR_AXIS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SQUEEZE_ATTR_DIMS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SQUEEZE_OP_NAME; + +// Stride slice +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string STRIDE_SLICE_ATTR_BEGIN_MASK; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string STRIDE_SLICE_ATTR_END_MASK; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string STRIDE_SLICE_ATTR_ELLIPSIS_MASK; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string STRIDE_SLICE_ATTR_NEW_AXIS_MASK; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK; + +// Slice +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SLICE_ATTR_NAME_BEGINS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SLICE_ATTR_NAME_SIZES; + +// Roialign +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ROIALIGN_ATTR_SPATIAL_SCALE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ROIALIGN_ATTR_SAMPLING_RATIO; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ROIALIGN_ATTR_NAME_POOLED_H; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ROIALIGN_ATTR_NAME_POOLED_W; + +// Generate_rpn_proposal +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string GENERATE_RPN_PROPOSAL_ATTR_PRE_NMS_TOPK; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string GENERATE_RPN_PROPOSAL_ATTR_POST_NMS_TOPK; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string GENERATE_RPN_PROPOSAL_ATTR_RPN_MINI_SIZE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string + GENERATE_RPN_PROPOSAL_ATTR_RPN_PROPOSAL_NMS_THRESH; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string + GENERATE_RPN_PROPOSAL_ATTR_RPN_PROPOSAL_FILTER_THRESH; +// Decode_bbox +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DECODE_BBOX_ATTR_DECODECLIP; + +// Cast +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CAST_ATTR_DSTT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CAST_ATTR_SRCT; + +// Fastrcnnn predications +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string FASTRCNN_PREDICTIONS_ATTR_TOPK; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string FASTRCNN_PREDICTIONS_ATTR_SCORE_THRESHOLD; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string FASTRCNN_PREDICTIONS_ATTR_NMS_THRESHOLD; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string FASTRCNN_PREDICTIONS_ATTR_NUM_CLASSES; + +// REORG +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string REORG_ATTR_STRIDE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string REORG_ATTR_REVERSE; + +// MERGE +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string MERGE_DEAD_INDEX; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string MERGE_PRENODE_FLAG; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string TO_BE_OUTPUT; +static const std::string NOT_NET_OUTPUT = "not_net_output"; + +// Concatv2 +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONCAT_V2_ATTR_TIDX; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONCAT_V2_ATTR_N; +// SUM +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SUM_ATTR_TIDX; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SUM_ATTR_AXIS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SUM_ATTR_KEEP_DIMS; + +// ResizeBilinear +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RESIZE_BILINEAR_ATTR_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RESIZE_BILINEAR_ATTR_ALIGN_CORNERS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RESIZE_BILINEAR_ATTR_HEIGHT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RESIZE_BILINEAR_ATTR_WIDTH; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RESIZE_BILINEAR_ATTR_ZOOM_FACTOR; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RESIZE_BILINEAR_ATTR_SHRINK_FACTOR; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RESIZE_BILINEAR_ATTR_PAD_BEGIN; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RESIZE_BILINEAR_ATTR_PAD_END; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RESIZE_BILINEAR_ATTR_ALPHA; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RESIZE_BILINEAR_ATTR_BETA; + +// RetinaNet +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RETINANET_FILTER_BACKGROUND_TRUE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RETINANET_ANCHOR_FUSION; + +// MatMul +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string MATMUL_TRANSPOSE_X; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string MATMUL_TRANSPOSE_W; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string MATMUL_HAS_BIAS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string MATMUL_ATTR_IS_TRAINING; + +// Flatten +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string FLATTEN_START_AXIS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string FLATTEN_END_AXIS; + +// Reshape +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RESHAPE_ATTR_AXIS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RESHAPE_ATTR_NUM_AXES; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RESHAPE_ATTR_FORMAT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RESHAPE_ATTR_SHAPE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RESHAPE_ATTR_ALPHA; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RESHAPE_ATTR_BETA; + +// Frameoworkop +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string T_IN_DATATYPE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string T_OUT_DATATYPE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_OUT_N; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_OUT_C; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_OUT_H; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_OUT_W; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_PAD_DEPTH_CONV; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_PAD_CONV; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_BEFORE_PAD; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ANN_MEAN_KEEPDIMS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PAD_ATTR_PADDINGDS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PAD_ATTR_CONSTANT_VALUE; + +// ConvGradFilter +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONV_GRAD_FILTER_OUTPUT_SHAPE; +// ConvGradInput +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CONV_GRAD_INPUT_OUTPUT_SHAPE; + +// Rnn +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RNN_MODE_STATIC; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string MUTI_RNN; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CELL_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string CNN_RNN; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string LSTM_CELL; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string GRU_CELL; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RNN_HT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RNN_XT_HT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RNN_BATCH_SIZE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string LSTM_CELL_CLIP; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string LSTM_PROJ_CLIP; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string LSTM_ACTIVATE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string LSTM_OUT_MAP; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string LSTM_OUT_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string LSTM_STATE_OUT_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string LSTM_TIME_MAJOR; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string LSTM_IS_INPUT_PRE_PROCESS; + +// Upsample +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string UPSAMPLE_ATTR_NAME_SCALE; + +// PadV2 +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PADV2_ATTR_NAME_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PADV2_ATTR_NAME_PADS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PADV2_ATTR_NAME_T; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PADV2_ATTR_NAME_PAD_FORMAT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PADV2_ATTR_NAME_CONST_VALUE; + +// MirrorPad +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string MIRRORPAD_ATTR_NAME_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string MIRRORPAD_ATTR_NAME_PADS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string MIRRORPAD_ATTR_NAME_PAD_FORMAT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string MIRRORPAD_ATTR_NAME_CONST_VALUE; + +// Filler +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string FILLER_TYPE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string FILLER_VALUE; + +// Shufflechannel +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SHUFFLE_CHANNEL_GROUP; + +// TopKV2 +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string TOPKV2_ATTR_K; + +// Calibaration +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string STRIDE_H_INDEX; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string STRIDE_W_INDEX; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PAD_TOP_INDEX; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PAD_BOTTOM_INDEX; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PAD_RIGHT_INDEX; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string PAD_LEFT_INDEX; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_IS_CONST; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_GROUP; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_DILATION_SIZE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_EPSILON; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_POOLING_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_CLASS_NUM; + +// model +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_MODEL_TARGET_TYPE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_MODEL_STREAM_NUM; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_MODEL_EVENT_NUM; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_MODEL_MEMORY_SIZE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_MODEL_WEIGHT_SIZE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_MODEL_TASK_GEN_BASE_ADDR; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_MODEL_TASK_GEN_WEIGHT_ADDR; + +// Public Attribute +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_IMPLY_TYPE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_BYTE_SIZE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_FUSION_INFERENCE_ID; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_FUSION_OPDEF; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_FUSION_SCOPE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_OPATTR; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_RELUFLAG; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_SEQLEN_INDEX; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_X_INDEX; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_CONT_INDEX; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_XSTATIC_INDEX; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string TARGET_TYPE_MINI; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string TARGET_TYPE_TINY; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string TARGET_TYPE_LITE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_STREAM_LABEL; + +// L2_normalize +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string L2_NORMALIZE_ATTR_AXIS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string L2_NORMALIZE_ATTR_EPS; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POOL_PARAMA_ATTR_WINDOW; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POOL_PARAMA_ATTR_CEIL_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POOL_PARAMA_ATTR_DATA_MODE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POOL_PARAMA_ATTR_GLOBAL_POOLING; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POOL_PARAMA_ATTR_NAN_OP; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string POOL_PARAMA_ATTR_PAD_MOD; +// HCOM +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string HCOM_ATTR_ROOT_RANK; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string HCOM_ATTR_REDUCE_TYPE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string HCOM_ATTR_RANK_SIZE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string HCOM_ATTR_REDUCTION; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string HCOM_ATTR_GROUP; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string HCOM_ATTR_SR_TAG; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string HCOM_ATTR_SRC_RANK; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string HCOM_ATTR_DEST_RANK; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string HCOM_ATTR_FUSION; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string HCOM_ATTR_SHAPE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string HCOM_ATTR_DATA_TYPE; +// Log time stamp +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string LOG_TIME_STAMP_LOGID; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string LOG_TIME_STAMP_NOTIFY; +// SpaceToDepth/DepthToSpace +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_BLOCK_SIZE; + +// SparseSoftmaxCrossEntropyWithLogits +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SPARSE_SOFT_MAX_ATTR_TLABLES; + +// MaxPoolGradWithArgmax +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string MAX_POOL_GRAD_OUTPUT_SHAPE; + +// AvgPoolGrad +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string AVG_POOL_GRAD_OUTPUT_SHAPE; + +// Pad +extern const std::string ATTR_PAD_FORMAT; + +// Varible +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_FORMAT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_NAME; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_FRACTALZ_FORMAT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_4D_FORMAT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_5D_FORMAT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_DATA_TYPE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_VAR_IN_NAME; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_VAR_IN_INDEX; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_VAR_OUT_INDEX; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_SHAPE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string HALF_VAR_NAME_END; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_CONTAINER; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_SHARED_NAME; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_DTYPE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_ADDR_OFFSET; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_VAR_IN_INDEX_KEY; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_VAR_OUT_INDEX_KEY; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_SRC_VAR_NAME; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_VAR_IS_SAVE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_VAR_IS_RESTORE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string VAR_ATTR_VAR_IS_BROADCAST; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string REF_VAR_SRC_VAR_NAME; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string REF_VAR_PRE_PEER_OUT_INDEX; + +// Assign +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ASSIGN_VALIDATE_SHAPE; + +// ShapeN +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SHAPEN_ATTR_N; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SHAPEN_ATTR_IN_TYPE; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SHAPEN_ATTR_OUT_TYPE; + +// Space2bacth batch2space +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string BATCH_SPACE_ATTR_BLOCK; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string BATCH_SPACE_ATTR_PADDING; +// Depth_to_space space_to_depth +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string DEPTH_SPACE_ATTR_BLOCK_SIZE; +// FakeQuantWithMinMaxVars +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string FakeQuantWithMinMaxVars_ATTR_MAX; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string FakeQuantWithMinMaxVars_ATTR_MIN; +// Mobilenet_ssd_conv_fusion +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_BOXPREDICTOR_BOXES_FUSION; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_BOXPREDICTOR_SCORES_FUSION; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string SSD_BOXPREDICTOR_FUSION_BOX_TYPE_NUM; + +// Lsh project +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string LSH_PROJ_TYPE; + +// Control flow +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_ITERATORS_PER_LOOP; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_TRUE_BRANCH_STREAM; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_FLOW_CTRL_NODE_FLAG; + +// GatherV2 attr def +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string GATHERV2_ATTR_NAME_TAXIS; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string GATHERV2_ATTR_NAME_TINDICES; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string GATHERV2_ATTR_NAME_TPARAMS; + +// Reshape attr def +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RESHAPE_ATTR_NAME_INPUT_DESC; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string RESHAPE_ATTR_NAME_OUTPUT_DESC; + +// Axis attr def +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_AXIS_ORG_OP; + +// The node link with SparseSoftmaxCrossEntropyWithLogits +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_LINK_WITH_SPARE; + +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_NET_OUTPUT_FORMAT; +extern FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string ATTR_NAME_NET_OUTPUT_DATATYPE; +} // namespace domi + +#endif // INC_FRAMEWORK_COMMON_OP_ATTR_DEFINE_H_ diff --git a/inc/framework/common/op/attr_value_util.h b/inc/framework/common/op/attr_value_util.h index 204dd648..ad13d75c 100644 --- a/inc/framework/common/op/attr_value_util.h +++ b/inc/framework/common/op/attr_value_util.h @@ -17,19 +17,17 @@ #ifndef INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ #define INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ -#include -#include #include - -#include "common/types.h" +#include #include "graph/debug/ge_attr_define.h" +#include "common/types.h" #include "proto/om.pb.h" using domi::AttrDef; -using domi::OpDef; using domi::AttrDef_ListValue; using domi::ModelDef; using domi::NamedAttrs; +using domi::OpDef; namespace ge { using AttrDefMap = ::google::protobuf::Map<::std::string, ::domi::AttrDef>; diff --git a/inc/framework/common/op/ge_op_utils.h b/inc/framework/common/op/ge_op_utils.h index 9c5e1667..94f98c5e 100644 --- a/inc/framework/common/op/ge_op_utils.h +++ b/inc/framework/common/op/ge_op_utils.h @@ -172,7 +172,7 @@ class OpUtils { /// /// @ingroup domi_omg - /// @brief Convert the convolution‘s weight data from [h, w, c, k] to [k, c, h, w] + /// @brief Convert the convolutions weight data from [h, w, c, k] to [k, c, h, w] /// @param [in] input Weight data in HWCK format /// @param [in] H value of H dimension /// @param [in] W value of W dimension @@ -183,7 +183,7 @@ class OpUtils { static void TransDataHWCK2KCHW(const void *input, int64_t H, int64_t W, int64_t C, int64_t K, void **output); /// /// @ingroup domi_omg - /// @brief Converts the convolution‘s weight data from [k, c, h, w] to [h, w, c, k]. + /// @brief Converts the convolutions weight data from [k, c, h, w] to [h, w, c, k]. /// @param [in] input Weight data in HWCK format /// @param [in] K value of K dimension /// @param [in] C value of C dimension @@ -198,7 +198,7 @@ class OpUtils { /// training network /// @param [in] model_tensor input and output tensor information /// @param [out] cc_tensor Tensor in CCE format after conversion - //// + /// static Status InitFilterTensorDescriptor(const ge::GeTensorDesc &model_tensor, ccFilterDescriptor_t &cc_tensor); static void SetTensorDescriptorAllOffsetQuantizeInfo(const GeTensorDesc &tensor, ccTensorDescriptor_t cc_tensor); diff --git a/inc/framework/common/op/op_parser_util.h b/inc/framework/common/op/op_parser_util.h index 714491ee..e64ddc92 100644 --- a/inc/framework/common/op/op_parser_util.h +++ b/inc/framework/common/op/op_parser_util.h @@ -23,7 +23,7 @@ #include namespace domi { -// General +// general const float DEFAULT_ALPHA_VALUE = 1.0; const float DEFAULT_BETA_VALUE = 0.0; const uint32_t NORMAL_INPUT_NUM = 1; @@ -37,7 +37,7 @@ const int NORMAL_DEVICE_DATA_TYPE = static_cast(cce::CC_DATA_HALF); const int DEFAULT_POOLING_MODE = static_cast(cce::CC_POOLING_MAX); const uint32_t DEFAULT_REAL_DIM_CNT = 4; -// Const +// const const uint32_t CONST_OP_INPUT_NUM = 0; const uint32_t CONST_OP_NORMAL_WEIGHT_SIZE = 1; @@ -56,7 +56,7 @@ const int32_t FUSEDBATCHNORMGRAD_WORKSPACE_NUM = 1; const int32_t FUSEDBATCHNORMGRAD_INPUT_NUM = 5; const int32_t FUSEDBATCHNORMGRAD_OUTPUT_NUM = 3; -// Conv +// conv const uint32_t CONVOLUTION_WORKSPACE_NUM = 1; const uint32_t CONVOLUTION_PAD_SIZE = 4; const uint32_t CONVOLUTION_STRIDE_SIZE = 2; @@ -104,7 +104,7 @@ const float LRN_DEFAULT_BETA = 0.75; /// /// @ingroup domi_common -/// @brief default value of roipooling +/// @brief roipooling default value /// const uint32_t ROIPOOLING_DEFAULT_POOLED_H = 0; const uint32_t ROIPOOLING_DEFAULT_POOLED_W = 0; @@ -115,7 +115,7 @@ const int32_t ROIPOOLING_DEFAULT_SAMPLING_RATIO = -1; const int32_t DETECTIONOUTPUT_INPUT_SIZE = 3; const int32_t DETECTIONOUTPUT_OUTPUT_SIZE = 2; const int32_t DETECTIONOUTPUT_WORKSPACE_NUM = 1; -const int DETECTIONOUTPUT_CLASS_NUM = 20; +const int DETECTIONOUTPUT_CLASS_NUM = 20; // Number of background categories const int DETECTIONOUTPUT_NUM_CLASSES_DEFAULT_VALUE = 21; const float DETECTIONOUTPUT_NMS_THRESHOLD_DEFAULT_VALUE = 0.3; const float DETECTIONOUTPUT_CONFIDENCE_THRESHOLD_DEFAULT_VALUE = 0.8; @@ -392,9 +392,9 @@ const uint32_t ATTENTION_DECODER_WEIGHT_CELL1_CANDIDATE_BIAS = 14; const uint32_t ATTENTION_DECODER_WEIGHT_EMBEDDING = 15; const uint32_t ATTENTION_DECODER_WEIGHT_ATTENVA = 16; const uint32_t ATTENTION_DECODER_WEIGHT_DECODER_INITIAL = 17; - // Attention decoder weight size const uint32_t ATTENTION_DECODER_WEIGHT_SIZE = 18; + const uint32_t ATTENTION_DECODER_INPUT_SIZE = 2; const uint32_t ATTENTION_DECODER_WORKSPACE_NUM = 1; const uint32_t ATTENTION_DECODER_INPUT_DECODER_INPUTS = 0; diff --git a/inc/framework/common/scope_guard.h b/inc/framework/common/scope_guard.h index 39214a43..79d71311 100644 --- a/inc/framework/common/scope_guard.h +++ b/inc/framework/common/scope_guard.h @@ -24,7 +24,7 @@ /// Acquire Resource 1 /// MAKE_GUARD([&] { Release Resource 1 }) /// Acquire Resource 2 -/// MAKE_GUARD([&] { Release Resource 2 }) +// MAKE_GUARD([&] { Release Resource 2 }) #define GE_MAKE_GUARD(var, callback) ge::ScopeGuard make_guard_##var(callback) #define GE_DISMISS_GUARD(var) make_guard_##var.Dismiss() diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h old mode 100755 new mode 100644 index bf1812c8..10ddc473 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -20,7 +20,6 @@ #include #include #include - #include #include #include @@ -49,7 +48,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_S FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_LAYER; FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMP_FILE_PATH; -// public property names which are supported +// Supported public properties name FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROP_OME_START_TIME; // Start time FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROP_OME_DUMP_PATH; // Dump path FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROP_OME_LOG_PATH; // Log path @@ -1033,11 +1032,14 @@ struct BasicInfo { uint32_t workspace_size; // workspace uint32_t total_size; // total memory size }; + #pragma pack() // Cancels single-byte alignment } // namespace ge namespace domi { + /// @brief Data structure definition related to task sinking +/// Build model enum BuildMode { GEN_TASK_WITHOUT_L2FUSION = 3, // Carrying task data (L2 convergence function disabled) GEN_TASK_WITHOUT_FUSION = 4, // Carrying task data (all convergence functions disabled) diff --git a/inc/framework/common/util.h b/inc/framework/common/util.h index c77872ed..d7a1822c 100644 --- a/inc/framework/common/util.h +++ b/inc/framework/common/util.h @@ -65,117 +65,160 @@ if (var) GE_CHK_CCE(ccDestroyFilterDescriptor(&var)); \ }); +// For propagating errors when calling a function. +#define GE_RETURN_IF_ERROR(expr) \ + do { \ + const ::ge::Status _status = (expr); \ + if (_status) return _status; \ + } while (0) + #define GE_RETURN_WITH_LOG_IF_ERROR(expr, ...) \ do { \ const ::ge::Status _status = (expr); \ if (_status) { \ - GELOGE(ge::FAILED, __VA_ARGS__); \ + GE_LOGE(__VA_ARGS__); \ return _status; \ } \ } while (0) +// check whether the parameter is true. If it is, return FAILED and record the error log +#define GE_RETURN_WITH_LOG_IF_TRUE(condition, ...) \ + do { \ + if (condition) { \ + GE_LOGE(__VA_ARGS__); \ + return ge::FAILED; \ + } \ + } while (0) + // Check if the parameter is false. If yes, return FAILED and record the error log #define GE_RETURN_WITH_LOG_IF_FALSE(condition, ...) \ do { \ bool _condition = (condition); \ if (!_condition) { \ - GELOGE(ge::FAILED, __VA_ARGS__); \ + GE_LOGE(__VA_ARGS__); \ return ge::FAILED; \ } \ } while (0) +// Checks whether the parameter is true. If so, returns PARAM_INVALID and records the error log +#define GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(condition, ...) \ + do { \ + if (condition) { \ + GE_LOGE(__VA_ARGS__); \ + return ge::PARAM_INVALID; \ + } \ + } while (0) + +// Check if the parameter is false. If yes, return PARAM_INVALID and record the error log +#define GE_RT_PARAM_INVALID_WITH_LOG_IF_FALSE(condition, ...) \ + do { \ + bool _condition = (condition); \ + if (!_condition) { \ + GE_LOGE(__VA_ARGS__); \ + return ge::PARAM_INVALID; \ + } \ + } while (0) + // Check if the parameter is null. If yes, return PARAM_INVALID and record the error -#define GE_CHECK_NOTNULL(val) \ - do { \ - if (val == nullptr) { \ - GELOGE(ge::PARAM_INVALID, "param[#val] must not be null."); \ - return ge::PARAM_INVALID; \ - } \ +#define GE_CHECK_NOTNULL(val) \ + do { \ + if (val == nullptr) { \ + GE_LOGE(param[#val] must not be null.); \ + return ge::PARAM_INVALID; \ + } \ } while (0) // Check if the parameter is null. If yes, return PARAM_INVALID and record the error -#define GE_CHECK_NOTNULL_JUST_RETURN(val) \ - do { \ - if (val == nullptr) { \ - GELOGE(ge::PARAM_INVALID, "param[#val] must not be null."); \ - return; \ - } \ +#define GE_CHECK_NOTNULL_JUST_RETURN(val) \ + do { \ + if (val == nullptr) { \ + GE_LOGE(param[#val] must not be null.); \ + return; \ + } \ } while (0) // Check whether the parameter is null. If so, execute the exec_expr expression and record the error log -#define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \ - do { \ - if (val == nullptr) { \ - GELOGE(ge::PARAM_INVALID, "param[#val] must not be null."); \ - exec_expr; \ - } \ +#define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \ + do { \ + if (val == nullptr) { \ + GE_LOGE(param[#val] must not be null.); \ + exec_expr; \ + } \ } while (0) // Check whether the parameter is null. If yes, return directly and record the error log -#define GE_RT_VOID_CHECK_NOTNULL(val) \ - do { \ - if (val == nullptr) { \ - GELOGE(ge::PARAM_INVALID, "param[#val] must not be null."); \ - return; \ - } \ +#define GE_RT_VOID_CHECK_NOTNULL(val) \ + do { \ + if (val == nullptr) { \ + GE_LOGE(param[#val] must not be null.); \ + return; \ + } \ } while (0) // Check if the parameter is null. If yes, return false and record the error log -#define GE_RT_FALSE_CHECK_NOTNULL(val) \ - do { \ - if (val == nullptr) { \ - GELOGE(ge::FAILED, "param[#val] must not be null."); \ - return false; \ - } \ +#define GE_RT_FALSE_CHECK_NOTNULL(val) \ + do { \ + if (val == nullptr) { \ + GE_LOGE(param[#val] must not be null.); \ + return false; \ + } \ } while (0) // Check if the parameter is out of bounds -#define GE_CHECK_SIZE(size) \ - do { \ - if (size == 0) { \ - GELOGE(ge::PARAM_INVALID, "param[#size] is out of range"); \ - return ge::PARAM_INVALID; \ - } \ +#define GE_CHECK_SIZE(size) \ + do { \ + if (size == 0) { \ + GE_LOGE(param[#size] is out of range); \ + return ge::PARAM_INVALID; \ + } \ } while (0) // Macros that define the size variable -#define GE_DEFINE_BYTE_SIZE(_var_name, _expr, _sizeof) \ - uint32_t _var_name; \ - do { \ - uint32_t _expr_size = (_expr); \ - uint32_t _sizeof_size = (_sizeof); \ - if (_expr_size > (0xffffffff) / _sizeof_size) { \ - GELOGE(ge::PARAM_INVALID, "byte_size: [#_var_name] is out of range"); \ - return ge::PARAM_INVALID; \ - } \ - _var_name = _sizeof_size * _expr_size; \ +#define GE_DEFINE_BYTE_SIZE(_var_name, _expr, _sizeof) \ + uint32_t _var_name; \ + do { \ + uint32_t _expr_size = (_expr); \ + uint32_t _sizeof_size = (_sizeof); \ + if (_expr_size > (0xffffffff) / _sizeof_size) { \ + GE_LOGE(byte size : #_var_name is out of range); \ + return ge::PARAM_INVALID; \ + } \ + _var_name = _sizeof_size * _expr_size; \ } while (0); // Check if the container is empty -#define GE_CHECK_VECTOR_NOT_EMPTY(vector) \ - do { \ - if (vector.empty()) { \ - GELOGE(ge::FAILED, "param[#vector] is empty !"); \ - return ge::FAILED; \ - } \ +#define GE_CHECK_VECTOR_NOT_EMPTY(vector) \ + do { \ + if (vector.empty()) { \ + GE_LOGE(param[#vector] is empty !); \ + return ge::FAILED; \ + } \ + } while (0) + +#define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ + do { \ + if (size <= 0) { \ + GE_LOGE(param[#size] is not a positive number); \ + return ge::PARAM_INVALID; \ + } \ } while (0) // Check if the value on the left is greater than or equal to the value on the right -#define GE_CHECK_GE(lhs, rhs) \ - do { \ - if (lhs < rhs) { \ - GELOGE(ge::PARAM_INVALID, "param[#lhs] is less than[#rhs]"); \ - return ge::PARAM_INVALID; \ - } \ +#define GE_CHECK_GE(lhs, rhs) \ + do { \ + if (lhs < rhs) { \ + GE_LOGE(param[#lhs] is less than[#rhs]); \ + return ge::PARAM_INVALID; \ + } \ } while (0) // Check if the value on the left is less than or equal to the value on the right -#define GE_CHECK_LE(lhs, rhs) \ - do { \ - if (lhs > rhs) { \ - GELOGE(ge::PARAM_INVALID, "param[#lhs] is greater than[#rhs]"); \ - return ge::PARAM_INVALID; \ - } \ +#define GE_CHECK_LE(lhs, rhs) \ + do { \ + if (lhs > rhs) { \ + GE_LOGE(param[#lhs] is greater than[#rhs]); \ + return ge::PARAM_INVALID; \ + } \ } while (0) #define GE_DELETE_NEW_SINGLE(var) \ @@ -194,13 +237,15 @@ } \ }; -/// -/// @ingroup domi_common -/// @brief version of om.proto file -/// +/** + * @ingroup domi_common + * @brief version of om.proto file + */ static constexpr int32_t OM_PROTO_VERSION = 2; -// Finding an Integer Ceiling Value Without Precision Loss +/** + * Finding an Integer Ceiling Value Without Precision Loss + */ #define CEIL(N, n) (((N) + (n)-1) / (n)) namespace ge { diff --git a/inc/framework/dlog/log.h b/inc/framework/dlog/log.h index ab040560..899a98b1 100644 --- a/inc/framework/dlog/log.h +++ b/inc/framework/dlog/log.h @@ -21,7 +21,7 @@ #if !defined(__ANDROID__) && !defined(ANDROID) #include "toolchain/slog.h" #else -#include +#include #endif #ifdef _MSC_VER @@ -31,16 +31,11 @@ #endif #if !defined(__ANDROID__) && !defined(ANDROID) -#define DAV_LOGI(MOD_NAME, fmt, ...) \ - dlog_info(static_cast(GE), "%s:" #fmt, __FUNCTION__, ##__VA_ARGS__) -#define DAV_LOGW(MOD_NAME, fmt, ...) \ - dlog_warn(static_cast(GE), "%s:" #fmt, __FUNCTION__, ##__VA_ARGS__) -#define DAV_LOGE(MOD_NAME, fmt, ...) \ - dlog_error(static_cast(GE), "%s:" #fmt, __FUNCTION__, ##__VA_ARGS__) -#define DAV_LOGD(MOD_NAME, fmt, ...) \ - dlog_debug(static_cast(GE), "%s:" #fmt, __FUNCTION__, ##__VA_ARGS__) -#define DAV_EVENT(MOD_NAME, fmt, ...) \ - dlog_event(static_cast(GE), "%s:" #fmt, __FUNCTION__, ##__VA_ARGS__) +#define DAV_LOGI(MOD_NAME, fmt, ...) dlog_info(static_cast(GE), "%s:" #fmt, __FUNCTION__, ##__VA_ARGS__) +#define DAV_LOGW(MOD_NAME, fmt, ...) dlog_warn(static_cast(GE), "%s:" #fmt, __FUNCTION__, ##__VA_ARGS__) +#define DAV_LOGE(MOD_NAME, fmt, ...) dlog_error(static_cast(GE), "%s:" #fmt, __FUNCTION__, ##__VA_ARGS__) +#define DAV_LOGD(MOD_NAME, fmt, ...) dlog_debug(static_cast(GE), "%s:" #fmt, __FUNCTION__, ##__VA_ARGS__) +#define DAV_EVENT(MOD_NAME, fmt, ...) dlog_event(static_cast(GE), "%s:" #fmt, __FUNCTION__, ##__VA_ARGS__) #else #define DAV_LOGI(MOD_NAME, fmt, ...) \ __android_log_print(ANDROID_LOG_INFO, MOD_NAME, "%s %s(%d)::" #fmt, __FILE__, __FUNCTION__, __LINE__, ##__VA_ARGS__) diff --git a/inc/framework/ge_runtime/task_info.h b/inc/framework/ge_runtime/task_info.h index 271df4a0..567a2964 100644 --- a/inc/framework/ge_runtime/task_info.h +++ b/inc/framework/ge_runtime/task_info.h @@ -28,23 +28,23 @@ namespace ge { namespace model_runner { enum TaskInfoType { - kCce = 0, - kTbe, - kAiCpu, - kLabelSet, - kLabelSwitch, - kLabelGoto, - kEventRecord, - kEventWait, - kFusionStart, - kFusionEnd, - kHccl, - kProfilerTrace, - kMemcpyAsync, - kStreamSwitch, - kStreamActive, + CCE = 0, + TBE, + AICPU, + LABEL_SET, + LABEL_SWITCH, + LABEL_GOTO, + EVENT_RECORD, + EVENT_WAIT, + FUSION_START, + FUSION_END, + HCCL, + PROFILER_TRACE, + MEMCPY_ASYNC, + STREAM_SWITCH, + STREAM_ACTIVE, // Insert new task type here - kReserved = 23 + REVSERVED = 23 }; class TaskInfo { @@ -66,7 +66,7 @@ class CceTaskInfo : public TaskInfo { CceTaskInfo(uint32_t stream_id, const cce::ccOpContext &ctx, const std::string &stub_func, uint32_t block_dim, const std::vector &args, uint32_t args_size, const std::vector &sm_desc, const std::vector &flow_table, const std::vector &args_offset, bool is_flowtable) - : TaskInfo(stream_id, TaskInfoType::kCce), + : TaskInfo(stream_id, TaskInfoType::CCE), ctx_(ctx), stub_func_(stub_func), block_dim_(block_dim), @@ -106,7 +106,7 @@ class TbeTaskInfo : public TaskInfo { uint32_t args_size, const std::vector &sm_desc, void *binary, uint32_t binary_size, const std::vector &meta_data, const std::vector &input_data_addrs, const std::vector &output_data_addrs, const std::vector &workspace_addrs) - : TaskInfo(stream_id, TaskInfoType::kTbe), + : TaskInfo(stream_id, TaskInfoType::TBE), stub_func_(stub_func), block_dim_(block_dim), args_(args), @@ -155,7 +155,7 @@ class AicpuTaskInfo : public TaskInfo { public: AicpuTaskInfo(uint32_t stream_id, const string &so_name, const std::string &kernel_name, const std::string &node_def, const std::vector &input_data_addrs, const std::vector &output_data_addrs) - : TaskInfo(stream_id, TaskInfoType::kAiCpu), + : TaskInfo(stream_id, TaskInfoType::AICPU), so_name_(so_name), kernel_name_(kernel_name), node_def_(node_def), @@ -192,21 +192,21 @@ class LabelTaskInfo : public TaskInfo { class LabelSetTaskInfo : public LabelTaskInfo { public: LabelSetTaskInfo(uint32_t stream_id, uint32_t label_id) - : LabelTaskInfo(stream_id, TaskInfoType::kLabelSet, label_id) {} + : LabelTaskInfo(stream_id, TaskInfoType::LABEL_SET, label_id) {} ~LabelSetTaskInfo() override {} }; class LabelSwitchTaskInfo : public LabelTaskInfo { public: LabelSwitchTaskInfo(uint32_t stream_id, uint32_t label_id) - : LabelTaskInfo(stream_id, TaskInfoType::kLabelSwitch, label_id) {} + : LabelTaskInfo(stream_id, TaskInfoType::LABEL_SWITCH, label_id) {} ~LabelSwitchTaskInfo() override {} }; class LabelGotoTaskInfo : public LabelTaskInfo { public: LabelGotoTaskInfo(uint32_t stream_id, uint32_t label_id) - : LabelTaskInfo(stream_id, TaskInfoType::kLabelGoto, label_id) {} + : LabelTaskInfo(stream_id, TaskInfoType::LABEL_GOTO, label_id) {} ~LabelGotoTaskInfo() override {} }; @@ -225,26 +225,26 @@ class EventTaskInfo : public TaskInfo { class EventRecordTaskInfo : public EventTaskInfo { public: EventRecordTaskInfo(uint32_t stream_id, uint32_t event_id) - : EventTaskInfo(stream_id, TaskInfoType::kEventRecord, event_id) {} + : EventTaskInfo(stream_id, TaskInfoType::EVENT_RECORD, event_id) {} ~EventRecordTaskInfo() override {} }; class EventWaitTaskInfo : public EventTaskInfo { public: EventWaitTaskInfo(uint32_t stream_id, uint32_t event_id) - : EventTaskInfo(stream_id, TaskInfoType::kEventWait, event_id) {} + : EventTaskInfo(stream_id, TaskInfoType::EVENT_WAIT, event_id) {} ~EventWaitTaskInfo() override {} }; class FusionStartTaskInfo : public TaskInfo { public: - explicit FusionStartTaskInfo(uint32_t stream_id) : TaskInfo(stream_id, TaskInfoType::kFusionStart) {} + FusionStartTaskInfo(uint32_t stream_id) : TaskInfo(stream_id, TaskInfoType::FUSION_START) {} ~FusionStartTaskInfo() override {} }; class FusionEndTaskInfo : public TaskInfo { public: - explicit FusionEndTaskInfo(uint32_t stream_id) : TaskInfo(stream_id, TaskInfoType::kFusionEnd) {} + FusionEndTaskInfo(uint32_t stream_id) : TaskInfo(stream_id, TaskInfoType::FUSION_END) {} ~FusionEndTaskInfo() override {} }; @@ -256,7 +256,7 @@ class HcclTaskInfo : public TaskInfo { int64_t op_type, int64_t data_type, std::function hcom_bind_model, std::function hcom_unbind_model, std::function, void *)> hcom_distribute_task) - : TaskInfo(stream_id, TaskInfoType::kHccl), + : TaskInfo(stream_id, TaskInfoType::HCCL), hccl_type_(hccl_type), input_data_addr_(input_data_addr), output_data_addr_(output_data_addr), @@ -313,7 +313,7 @@ class HcclTaskInfo : public TaskInfo { class ProfilerTraceTaskInfo : public TaskInfo { public: ProfilerTraceTaskInfo(uint32_t stream_id, uint64_t log_id, bool notify, uint32_t flat) - : TaskInfo(stream_id, TaskInfoType::kProfilerTrace), log_id_(log_id), notify_(notify), flat_(flat) {} + : TaskInfo(stream_id, TaskInfoType::PROFILER_TRACE), log_id_(log_id), notify_(notify), flat_(flat) {} ~ProfilerTraceTaskInfo() override {} uint64_t log_id() const { return log_id_; } @@ -329,7 +329,7 @@ class ProfilerTraceTaskInfo : public TaskInfo { class MemcpyAsyncTaskInfo : public TaskInfo { public: MemcpyAsyncTaskInfo(uint32_t stream_id, void *dst, uint64_t dst_max, void *src, uint64_t count, uint32_t kind) - : TaskInfo(stream_id, TaskInfoType::kMemcpyAsync), + : TaskInfo(stream_id, TaskInfoType::MEMCPY_ASYNC), dst_(dst), dst_max_(dst_max), src_(src), @@ -355,7 +355,7 @@ class StreamSwitchTaskInfo : public TaskInfo { public: StreamSwitchTaskInfo(uint32_t stream_id, int64_t true_stream_id, void *input_addr, void *value_addr, int64_t cond, int64_t data_type) - : TaskInfo(stream_id, TaskInfoType::kStreamSwitch), + : TaskInfo(stream_id, TaskInfoType::STREAM_SWITCH), true_stream_id_(true_stream_id), input_addr_(input_addr), value_addr_(value_addr), @@ -380,7 +380,7 @@ class StreamSwitchTaskInfo : public TaskInfo { class StreamActiveTaskInfo : public TaskInfo { public: StreamActiveTaskInfo(uint32_t stream_id, uint32_t active_stream_id) - : TaskInfo(stream_id, TaskInfoType::kStreamActive), active_stream_id_(active_stream_id) {} + : TaskInfo(stream_id, TaskInfoType::STREAM_ACTIVE), active_stream_id_(active_stream_id) {} ~StreamActiveTaskInfo() override {} uint32_t active_stream_id() const { return active_stream_id_; } diff --git a/inc/framework/generator/generator_api.h b/inc/framework/generator/generator_api.h index 71c6832e..39b4da8b 100644 --- a/inc/framework/generator/generator_api.h +++ b/inc/framework/generator/generator_api.h @@ -24,6 +24,7 @@ extern "C" { #endif typedef uint32_t Status_t; +using Status_t = uint32_t; typedef void *OpAttr_t; typedef void *OpTensor_t; diff --git a/inc/framework/memory/memory_assigner.h b/inc/framework/memory/memory_assigner.h index eb12367f..bbf0939b 100644 --- a/inc/framework/memory/memory_assigner.h +++ b/inc/framework/memory/memory_assigner.h @@ -39,4 +39,4 @@ class MemoryAssigner { ge::ComputeGraphPtr compute_graph_; }; } // namespace ge -#endif // INC_FRAMEWORK_MEMORY_MEMORY_ASSIGNER_H_ +#endif // INC_FRAMEWORK_MEMORY_MEMORY_ASSIGNER_H_ \ No newline at end of file diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h index 70b1f04f..67834c77 100644 --- a/inc/framework/omg/omg_inner_types.h +++ b/inc/framework/omg/omg_inner_types.h @@ -24,7 +24,6 @@ #include #include #include - #include "framework/common/fmk_error_codes.h" #include "framework/common/types.h" #include "register/register_fmk_types.h" @@ -40,10 +39,10 @@ using std::unordered_map; using std::vector; namespace ge { -/// -/// @ingroup domi_omg -/// @brief run model -/// +/** + * @ingroup domi_omg + * @brief run model + */ enum RunMode { kGeOmModel = 0, // generate offline model file kModelToJson = 1, // convert to JSON file @@ -119,12 +118,20 @@ struct OmgContext { } // namespace ge namespace domi { -/// -/// @ingroup domi_omg -/// @brief get OMG context -/// @return OmgContext context -/// +/** + * @ingroup domi_omg + * @brief get OMG context + * @return OmgContext context + */ ge::OmgContext &GetContext(); + +struct TEBinInfo { + // It is obsolete. It will be automatically obtained from the binfilename field of the JSON file later. + // To be compatible with use cases written by previous users, fields are not deleted.(2018.11.21) + std::string bin_file_path; + std::string json_file_path; + std::string ddk_version; +}; } // namespace domi #endif // INC_FRAMEWORK_OMG_OMG_INNER_TYPES_H_ diff --git a/inc/framework/omg/version.h b/inc/framework/omg/version.h old mode 100755 new mode 100644 diff --git a/inc/graph/compute_graph.h b/inc/graph/compute_graph.h old mode 100755 new mode 100644 index 68980d56..5cf02dd2 --- a/inc/graph/compute_graph.h +++ b/inc/graph/compute_graph.h @@ -23,7 +23,7 @@ #include #include #include - +#include #include "detail/attributes_holder.h" #include "graph/anchor.h" #include "graph/node.h" diff --git a/inc/graph/debug/ge_attr_define.h b/inc/graph/debug/ge_attr_define.h index d3d0a122..e35c77f9 100644 --- a/inc/graph/debug/ge_attr_define.h +++ b/inc/graph/debug/ge_attr_define.h @@ -1,4 +1,4 @@ -/** +/** * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -149,7 +149,6 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MODEL_BATCH_NUM; - GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AUTOMIC_ADD_START; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_LABEL; @@ -468,9 +467,9 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string GENERATE GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string GENERATE_RPN_PROPOSAL_ATTR_POST_NMS_TOPK; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string GENERATE_RPN_PROPOSAL_ATTR_RPN_MINI_SIZE; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string - GENERATE_RPN_PROPOSAL_ATTR_RPN_PROPOSAL_NMS_THRESH; + GENERATE_RPN_PROPOSAL_ATTR_RPN_PROPOSAL_NMS_THRESH; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string - GENERATE_RPN_PROPOSAL_ATTR_RPN_PROPOSAL_FILTER_THRESH; + GENERATE_RPN_PROPOSAL_ATTR_RPN_PROPOSAL_FILTER_THRESH; // Decode_bbox GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DECODE_BBOX_ATTR_DECODECLIP; @@ -767,7 +766,6 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATT GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATTR_SHAPE; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string HCOM_ATTR_DATA_TYPE; - GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_DATATYPE; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OUTPUT_DATATYPE; @@ -776,3 +774,4 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DYNAMIC_ } // namespace ge #endif // INC_GRAPH_DEBUG_GE_ATTR_DEFINE_H_ +/*lint +e618*/ diff --git a/inc/graph/ge_context.h b/inc/graph/ge_context.h index 9948705b..f35e09ec 100644 --- a/inc/graph/ge_context.h +++ b/inc/graph/ge_context.h @@ -27,15 +27,15 @@ class GEContext { graphStatus GetOption(const std::string &key, std::string &option); uint64_t SessionId(); uint32_t DeviceId(); - uint64_t JobId(); + uint64_t TraceId(); void Init(); void SetCtxDeviceId(uint32_t device_id); private: uint64_t session_id_ = 0; uint32_t device_id_ = 0; - uint64_t job_id_ = 0; -}; + uint64_t trace_id_ = 0; +}; // class GEContext /// Get context /// @return diff --git a/inc/graph/ge_global_options.h b/inc/graph/ge_global_options.h old mode 100755 new mode 100644 diff --git a/inc/graph/ge_local_context.h b/inc/graph/ge_local_context.h index 692c96e7..b87c10b7 100644 --- a/inc/graph/ge_local_context.h +++ b/inc/graph/ge_local_context.h @@ -23,20 +23,22 @@ #include "graph/ge_error_codes.h" -using std::string; using std::map; +using std::string; namespace ge { class GEThreadLocalContext { public: graphStatus GetOption(const string &key, string &option); - void SetGlobalOption(map options_map); + void SetGraphOption(map options_map); void SetSessionOption(map options_map); + void SetGlobalOption(map options_map); private: + map graph_options_; map session_options_; map global_options_; -}; +}; // class GEThreadLocalContext GEThreadLocalContext &GetThreadLocalContext(); } // namespace ge diff --git a/inc/graph/model.h b/inc/graph/model.h old mode 100755 new mode 100644 index f29410ea..8e33b119 --- a/inc/graph/model.h +++ b/inc/graph/model.h @@ -31,6 +31,8 @@ using std::map; using std::string; using std::vector; +/*lint -e148*/ + class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Model : public AttrHolder { public: Model(); @@ -65,7 +67,7 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Model : public AttrHolder { graphStatus Save(Buffer &buffer) const; graphStatus SaveToFile(const string &file_name) const; - // Model will be rewritten + // Model will be rewrite static graphStatus Load(const uint8_t *data, size_t len, Model &model); graphStatus Load(ge::proto::ModelDef &model_def); graphStatus LoadFromFile(const string &file_name); @@ -89,6 +91,7 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Model : public AttrHolder { std::string platform_version_{""}; Graph graph_; }; +/*lint +e148*/ } // namespace ge using ModelPtr = std::shared_ptr; diff --git a/inc/graph/node.h b/inc/graph/node.h index 66e38a43..2785b0b9 100644 --- a/inc/graph/node.h +++ b/inc/graph/node.h @@ -20,14 +20,14 @@ #include #include #include -#include #include #include - +#include #include "graph/ge_attr_value.h" +#include "utils/attr_utils.h" + #include "graph/op_desc.h" #include "graph/range_vistor.h" -#include "utils/attr_utils.h" namespace ge { class ComputeGraph; diff --git a/inc/external/graph/usr_types.h b/inc/graph/usr_types.h similarity index 99% rename from inc/external/graph/usr_types.h rename to inc/graph/usr_types.h index 64c036bd..796a70a3 100644 --- a/inc/external/graph/usr_types.h +++ b/inc/graph/usr_types.h @@ -20,7 +20,6 @@ #include #include #include - namespace ge { #define USR_TYPE_DEC(type, name) \ inline void set_##name(const type &value) { name = value; } \ diff --git a/src/common/graph/anchor.cc b/src/common/graph/anchor.cc index d16c96e0..fbfd3033 100644 --- a/src/common/graph/anchor.cc +++ b/src/common/graph/anchor.cc @@ -15,10 +15,8 @@ */ #include "graph/anchor.h" - #include #include - #include "debug/ge_util.h" #include "framework/common/debug/ge_log.h" #include "graph/node.h" @@ -53,6 +51,7 @@ void Anchor::UnlinkAll() noexcept { if (Unlink(peer_anchor_ptr) != GRAPH_SUCCESS) { GELOGW("unlink peer_anchor_ptr failed."); } + } while (!peer_anchors_.empty()); } } @@ -70,10 +69,10 @@ graphStatus Anchor::Unlink(const AnchorPtr &peer) { GE_IF_BOOL_EXEC(it == peer_anchors_.end(), GELOGW("this anchor is not connected to peer"); return GRAPH_FAILED); auto it_peer = - std::find_if(peer->peer_anchors_.begin(), peer->peer_anchors_.end(), [this](const std::weak_ptr &an) { - auto anchor = an.lock(); - return Equal(anchor); - }); + std::find_if(peer->peer_anchors_.begin(), peer->peer_anchors_.end(), [this](const std::weak_ptr &an) { + auto anchor = an.lock(); + return Equal(anchor); + }); GE_CHK_BOOL_RET_STATUS(it_peer != peer->peer_anchors_.end(), GRAPH_FAILED, "peer is not connected to this anchor"); diff --git a/src/common/graph/attr_value.cc b/src/common/graph/attr_value.cc index a5457ecc..066767c2 100644 --- a/src/common/graph/attr_value.cc +++ b/src/common/graph/attr_value.cc @@ -15,7 +15,6 @@ */ #include "external/graph/attr_value.h" - #include "debug/ge_log.h" #include "debug/ge_util.h" #include "framework/common/debug/ge_log.h" diff --git a/src/common/graph/buffer.cc b/src/common/graph/buffer.cc index f6dc7a83..ba43377a 100644 --- a/src/common/graph/buffer.cc +++ b/src/common/graph/buffer.cc @@ -15,7 +15,6 @@ */ #include "graph/buffer.h" - #include "proto/ge_ir.pb.h" #include "framework/common/debug/ge_log.h" diff --git a/src/common/graph/compute_graph.cc b/src/common/graph/compute_graph.cc old mode 100755 new mode 100644 index d82e619f..b0382357 --- a/src/common/graph/compute_graph.cc +++ b/src/common/graph/compute_graph.cc @@ -15,7 +15,9 @@ */ #include "graph/compute_graph.h" + #include + #include "./format_refiner.h" #include "./ge_context.h" #include "debug/ge_attr_define.h" @@ -95,7 +97,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::FindNode(co } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ComputeGraph::GraphAttrsAreEqual( - const ComputeGraph &r_graph) const { + const ComputeGraph &r_graph) const { // ProtoMsgOwner <::google::protobuf::Message> is temporarily ignored if ((this->attrs_.protoMsg_ != nullptr) && (r_graph.attrs_.protoMsg_ != nullptr)) { const auto &proto_attr_map = *(this->attrs_.protoMsg_); @@ -122,7 +124,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ComputeGraph::GraphAttrsAreE /// Since there may be different input nodes /// chosen by user in the same graph, special judgment is needed GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ComputeGraph::VectorInputNodePtrIsEqual( - const std::vector &left_nodes, const std::vector &right_nodes) const { + const std::vector &left_nodes, const std::vector &right_nodes) const { const auto left_nodes_size = left_nodes.size(); const auto right_nodes_size = right_nodes.size(); if (left_nodes_size != right_nodes_size) { @@ -151,7 +153,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ComputeGraph::VectorInputNod } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ComputeGraph::GraphMembersAreEqual( - const ComputeGraph &r_graph) const { + const ComputeGraph &r_graph) const { return (IsEqual(this->sub_graph_.size(), r_graph.sub_graph_.size(), "graph.sub_graph_.size()") && IsEqual(this->nodes_.size(), r_graph.nodes_.size(), "graph.nodes_.size()") && VectorInputNodePtrIsEqual(this->input_nodes_, r_graph.input_nodes_) && @@ -472,14 +474,14 @@ graphStatus ComputeGraph::DFSTopologicalSorting(std::vector &node_vec, } } GE_IF_BOOL_EXEC( - node->GetOutControlAnchor() != nullptr, for (AnchorPtr peer_in_anchor - : node->GetOutControlAnchor()->GetPeerAnchors()) { - GE_CHECK_NOTNULL(peer_in_anchor); - auto iter = map_in_edge_num.find(peer_in_anchor->GetOwnerNode()); - if (iter != map_in_edge_num.end() && --iter->second == 0) { - stack.push_back(peer_in_anchor->GetOwnerNode()); - } - }) + node->GetOutControlAnchor() != nullptr, for (AnchorPtr peer_in_anchor + : node->GetOutControlAnchor()->GetPeerAnchors()) { + GE_CHECK_NOTNULL(peer_in_anchor); + auto iter = map_in_edge_num.find(peer_in_anchor->GetOwnerNode()); + if (iter != map_in_edge_num.end() && --iter->second == 0) { + stack.push_back(peer_in_anchor->GetOwnerNode()); + } + }) } return GRAPH_SUCCESS; @@ -521,28 +523,30 @@ graphStatus ComputeGraph::BFSTopologicalSorting(std::vector &node_vec, graphStatus ComputeGraph::CollectBreadthOutNode(const NodePtr &node, std::map &map_in_edge_num, std::map &breadth_node_map) { - for (const auto &anchor : node->GetAllOutDataAnchors()) { - for (const auto &peer_in_anchor : anchor->GetPeerInDataAnchors()) { - auto iter = map_in_edge_num.find(peer_in_anchor->GetOwnerNode()); - if (iter != map_in_edge_num.end() && --iter->second == 0) { - (void)breadth_node_map.emplace(peer_in_anchor->GetOwnerNode()->GetName(), peer_in_anchor->GetOwnerNode()); - } + for (const auto &anchor : node->GetAllOutDataAnchors()) { + for (const auto &peer_in_anchor : anchor->GetPeerInDataAnchors()) { + auto iter = map_in_edge_num.find(peer_in_anchor->GetOwnerNode()); + if (iter != map_in_edge_num.end() && 0 == --iter->second) { + (void)breadth_node_map.emplace(peer_in_anchor->GetOwnerNode()->GetName(), peer_in_anchor->GetOwnerNode()); } - for (const auto &peer_in_anchor : anchor->GetPeerInControlAnchors()) { - auto iter = map_in_edge_num.find(peer_in_anchor->GetOwnerNode()); - if (iter != map_in_edge_num.end() && --iter->second == 0) { - (void)breadth_node_map.emplace(peer_in_anchor->GetOwnerNode()->GetName(), peer_in_anchor->GetOwnerNode()); - } + } + + for (const auto &peer_in_anchor : anchor->GetPeerInControlAnchors()) { + auto iter = map_in_edge_num.find(peer_in_anchor->GetOwnerNode()); + if (iter != map_in_edge_num.end() && 0 == --iter->second) { + (void)breadth_node_map.emplace(peer_in_anchor->GetOwnerNode()->GetName(), peer_in_anchor->GetOwnerNode()); } } - GE_IF_BOOL_EXEC( - node->GetOutControlAnchor() != nullptr, for (AnchorPtr peer_in_anchor - : node->GetOutControlAnchor()->GetPeerAnchors()) { - auto iter = map_in_edge_num.find(peer_in_anchor->GetOwnerNode()); - if (iter != map_in_edge_num.end() && --iter->second == 0) { - (void)breadth_node_map.emplace(peer_in_anchor->GetOwnerNode()->GetName(), peer_in_anchor->GetOwnerNode()); - } - }) + } + + GE_IF_BOOL_EXEC( + node->GetOutControlAnchor() != nullptr, for (AnchorPtr peer_in_anchor + : node->GetOutControlAnchor()->GetPeerAnchors()) { + auto iter = map_in_edge_num.find(peer_in_anchor->GetOwnerNode()); + if (iter != map_in_edge_num.end() && 0 == --iter->second) { + (void)breadth_node_map.emplace(peer_in_anchor->GetOwnerNode()->GetName(), peer_in_anchor->GetOwnerNode()); + } + }) return GRAPH_SUCCESS; } @@ -636,7 +640,7 @@ graphStatus ComputeGraph::SortNodes(std::vector &stack, std::mapGetName()); @@ -721,7 +725,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY void ComputeGraph::Dump() const { } } GE_IF_BOOL_EXEC(node->GetOutControlAnchor() == nullptr, GELOGE(GRAPH_FAILED, "Out control anchor is null"); - return); + return ); for (const auto &peer_in_anchor : node->GetOutControlAnchor()->GetPeerInControlAnchors()) { GE_IF_BOOL_EXEC(peer_in_anchor != nullptr && peer_in_anchor->GetOwnerNode() != nullptr, GELOGI("node name = %s, out control node name = %s.", node->GetName().c_str(), @@ -745,7 +749,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::Isolate GE_CHK_BOOL_EXEC(GraphUtils::RemoveEdge(pre_out_data_anchor, in_data_anchor) == GRAPH_SUCCESS, return GRAPH_FAILED, "remove edge failed"); GE_IF_BOOL_EXEC(pre_out_data_anchor->GetOwnerNode()->GetType() == CONSTANT || - pre_out_data_anchor->GetOwnerNode()->GetType() == CONSTANTOP, + pre_out_data_anchor->GetOwnerNode()->GetType() == CONSTANTOP, continue); for (const auto &out_data_anchor : node->GetAllOutDataAnchors()) { for (const auto &next_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { diff --git a/src/common/graph/debug/ge_op_types.h b/src/common/graph/debug/ge_op_types.h index 3905ed0e..d79eece4 100644 --- a/src/common/graph/debug/ge_op_types.h +++ b/src/common/graph/debug/ge_op_types.h @@ -16,7 +16,6 @@ #ifndef COMMON_GRAPH_DEBUG_GE_OP_TYPES_H_ #define COMMON_GRAPH_DEBUG_GE_OP_TYPES_H_ - #include #include #include @@ -26,7 +25,7 @@ #include namespace ge { -#define GE_REGISTER_OPTYPE(var_name, str_name) static const char *var_name __attribute__((unused)) = str_name +#define GE_REGISTER_OPTYPE(var_name, str_name) static const char* var_name __attribute__((unused)) = str_name GE_REGISTER_OPTYPE(DATA, "Data"); GE_REGISTER_OPTYPE(AIPPDATA, "AippData"); @@ -249,5 +248,5 @@ static const char* const kAippConvOpNmae = "aipp_conv_op"; /// @brief Operator configuration item separator /// static const char* const kOpConfDelimiter = ":"; -}; // namespace ge +}; // namespace ge #endif // COMMON_GRAPH_DEBUG_GE_OP_TYPES_H_ diff --git a/src/common/graph/debug/ge_util.h b/src/common/graph/debug/ge_util.h index 8a64014e..d982e44b 100644 --- a/src/common/graph/debug/ge_util.h +++ b/src/common/graph/debug/ge_util.h @@ -39,125 +39,126 @@ #endif #define GE_RETURN_IF_ERROR(expr) \ - do { \ - const ::ge::optStatus _status = (expr); \ - if (_status) return _status; \ + do { \ + const ::ge::optStatus _status = (expr); \ + if (_status) return _status; \ } while (0) #define GE_RETURN_WITH_LOG_IF_INFO(expr, ...) \ - do { \ - const ::ge::optStatus _status = (expr); \ - if (_status) { \ - GELOGI(__VA_ARGS__); \ - return _status; \ - } \ + do { \ + const ::ge::optStatus _status = (expr); \ + if (_status) { \ + GELOGI(__VA_ARGS__); \ + return _status; \ + } \ } while (0) // Verify whether the parameter is true. If yes, return graph failed and record the error log #define GE_RETURN_WITH_LOG_IF_TRUE(condition, ...) \ - do { \ - if (condition) { \ - GELOGE(ge::GRAPH_FAILED, __VA_ARGS__); \ - return ge::GRAPH_FAILED; \ - } \ + do { \ + if (condition) { \ + GELOGE(ge::GRAPH_FAILED, __VA_ARGS__); \ + return ge::GRAPH_FAILED; \ + } \ } while (0) // Verify whether the parameter is false. If yes, return graph failed and record the error log #define GE_RETURN_WITH_LOG_IF_FALSE(condition, ...) \ - do { \ - bool _condition = (condition); \ - if (!_condition) { \ - GELOGE(ge::GRAPH_FAILED, __VA_ARGS__); \ - return ge::GRAPH_FAILED; \ - } \ + do { \ + bool _condition = (condition); \ + if (!_condition) { \ + GELOGE(ge::GRAPH_FAILED, __VA_ARGS__); \ + return ge::GRAPH_FAILED; \ + } \ } while (0) // Verify whether the parameter is true. If yes, return GRAPH_PARAM_INVALID and record the error log -#define GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(condition, ...) \ - do { \ - if (condition) { \ - GELOGE(ge::GRAPH_PARAM_INVALID, __VA_ARGS__); \ - return ge::GRAPH_PARAM_INVALID; \ - } \ +#define GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(condition, ...) \ + do { \ + if (condition) { \ + GELOGE(ge::GRAPH_PARAM_INVALID, __VA_ARGS__); \ + return ge::GRAPH_PARAM_INVALID; \ + } \ } while (0) // Verify whether the parameter is false. If yes, return GRAPH_PARAM_INVALID and record the error log #define GE_RT_PARAM_INVALID_WITH_LOG_IF_FALSE(condition, ...) \ - do { \ - bool _condition = (condition); \ - if (!_condition) { \ - GELOGE(ge::GRAPH_PARAM_INVALID, __VA_ARGS__); \ - return ge::GRAPH_PARAM_INVALID; \ - } \ + do { \ + bool _condition = (condition); \ + if (!_condition) { \ + GELOGE(ge::GRAPH_PARAM_INVALID, __VA_ARGS__); \ + return ge::GRAPH_PARAM_INVALID; \ + } \ } while (0) // Verify whether the parameter is null. If yes, return GRAPH_PARAM_INVALID and record the error log -#define GE_CHECK_NOTNULL(val) \ - do { \ - if (val == nullptr) { \ - GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] must not be null.", #val); \ - return ge::GRAPH_PARAM_INVALID; \ - } \ +#define GE_CHECK_NOTNULL(val) \ + do { \ + if (val == nullptr) { \ + GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] must not be null.", #val); \ + return ge::GRAPH_PARAM_INVALID; \ + } \ } while (0) // Verify whether the parameter is null. If yes, return GRAPH_PARAM_INVALID and record the error log -#define GE_CHECK_NOTNULL_EXEC(val, expr) \ - do { \ - if (val == nullptr) { \ - GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] must not be null.", #val); \ - expr; \ - } \ +#define GE_CHECK_NOTNULL_EXEC(val, expr) \ + do { \ + if (val == nullptr) { \ + GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] must not be null.", #val); \ + expr; \ + } \ } while (0) // Verify whether the parameter is null. If yes, return false and record the error log -#define GE_RT_FALSE_CHECK_NOTNULL(val) \ - do { \ - if (val == nullptr) { \ - GELOGE(ge::GRAPH_FAILED, "param[%s] must not be null.", #val); \ - return false; \ - } \ +#define GE_RT_FALSE_CHECK_NOTNULL(val) \ + do { \ + if (val == nullptr) { \ + GELOGE(ge::GRAPH_FAILED, "param[%s] must not be null.", #val); \ + return false; \ + } \ } while (0) // Check whether the parameter is out of range -#define GE_CHECK_SIZE(size) \ - do { \ - if (size == 0) { \ - GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] is out of range", #size); \ - return ge::GRAPH_PARAM_INVALID; \ - } \ +#define GE_CHECK_SIZE(size) \ + do { \ + if (size == 0) { \ + GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] is out of range", #size); \ + return ge::GRAPH_PARAM_INVALID; \ + } \ } while (0) /// /// @ingroup GE_common +/// eg:GE_DEFINE_BYTE_SIZE(filter_byte, filter.data().size(), sizeof(float)); /// -#define GE_DEFINE_BYTE_SIZE(_var_name, _expr, _sizeof) \ - uint32_t _var_name; \ - do { \ - uint32_t _expr_size = (_expr); \ - uint32_t _sizeof_size = (_sizeof); \ - if (_expr_size > (0xffffffff) / _sizeof_size) { \ - GELOGE(ge::GRAPH_PARAM_INVALID, "byte size : %s is out of range", #_var_name); \ - return ge::GRAPH_PARAM_INVALID; \ - } \ - _var_name = _sizeof_size * _expr_size; \ +#define GE_DEFINE_BYTE_SIZE(_var_name, _expr, _sizeof) \ + uint32_t _var_name; \ + do { \ + uint32_t _expr_size = (_expr); \ + uint32_t _sizeof_size = (_sizeof); \ + if (_expr_size > (0xffffffff) / _sizeof_size) { \ + GELOGE(ge::GRAPH_PARAM_INVALID, "byte size : %s is out of range", #_var_name); \ + return ge::GRAPH_PARAM_INVALID; \ + } \ + _var_name = _sizeof_size * _expr_size; \ } while (0); // Check whether the container is empty -#define GE_CHECK_VECTOR_NOT_EMPTY(vector) \ - do { \ - if (vector.empty()) { \ - GELOGE(ge::GRAPH_FAILED, "param[#vector] is empty", #vector); \ - return ge::GRAPH_FAILED; \ - } \ +#define GE_CHECK_VECTOR_NOT_EMPTY(vector) \ + do { \ + if (vector.empty()) { \ + GELOGE(ge::GRAPH_FAILED, "param[#vector] is empty", #vector); \ + return ge::GRAPH_FAILED; \ + } \ } while (0) // Check whether the container is empty and return the specified status code #define GE_CHECK_VECTOR_NOT_EMPTY_RET_STATUS(vector, _status) \ - do { \ - if (vector.empty()) { \ - GELOGE(_status, "param[%s] is empty", #vector); \ - return _status; \ - } \ + do { \ + if (vector.empty()) { \ + GELOGE(_status, "param[%s] is empty", #vector); \ + return _status; \ + } \ } while (0) /// @@ -166,102 +167,102 @@ /// It is usually placed under private /// #define GE_DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName &) = delete; \ + TypeName(const TypeName &) = delete; \ void operator=(const TypeName &) = delete /// Check whether the size is 0 or out of range /// @param:size:Size to be verified -#define GE_CHECK_SIZE_RANGE(size) \ - do { \ - if (size == 0 || size >= UINT_MAX / 4) { \ - GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] is out of range", #size); \ - return ge::GRAPH_PARAM_INVALID; \ - } \ +#define GE_CHECK_SIZE_RANGE(size) \ + do { \ + if (size == 0 || size >= UINT_MAX / 4) { \ + GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] is out of range", #size); \ + return ge::GRAPH_PARAM_INVALID; \ + } \ } while (0) -#define GE_CHECK_SHORT_SIZE_RANGE(size) \ - do { \ - if (size == 0 || size >= UINT_MAX / 2) { \ - GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] is out of range", #size); \ - return ge::GRAPH_PARAM_INVALID; \ - } \ +#define GE_CHECK_SHORT_SIZE_RANGE(size) \ + do { \ + if (size == 0 || size >= UINT_MAX / 2) { \ + GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] is out of range", #size); \ + return ge::GRAPH_PARAM_INVALID; \ + } \ } while (0) -#define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ - do { \ - if (size <= 0) { \ - GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] is not a positive number", #size); \ - return ge::GRAPH_PARAM_INVALID; \ - } \ +#define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ + do { \ + if (size <= 0) { \ + GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] is not a positive number", #size); \ + return ge::GRAPH_PARAM_INVALID; \ + } \ } while (0) -#define GE_CHECK_POSITIVE_SHORT_SIZE_RANGE(size) \ - do { \ - if (size <= 0 || size == 0 || size >= UINT_MAX / 4) { \ - GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] is out of range", #size); \ - return ge::GRAPH_PARAM_INVALID; \ - } \ +#define GE_CHECK_POSITIVE_SHORT_SIZE_RANGE(size) \ + do { \ + if (size <= 0 || size == 0 || size >= UINT_MAX / 4) { \ + GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] is out of range", #size); \ + return ge::GRAPH_PARAM_INVALID; \ + } \ } while (0) // Verify that the value on the left is greater than or equal to the value on the right -#define GE_CHECK_GE(lhs, rhs) \ - do { \ - if (lhs < rhs) { \ - GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] is less than[%s]", #lhs, #rhs); \ - return ge::GRAPH_PARAM_INVALID; \ - } \ +#define GE_CHECK_GE(lhs, rhs) \ + do { \ + if (lhs < rhs) { \ + GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] is less than[%s]", #lhs, #rhs); \ + return ge::GRAPH_PARAM_INVALID; \ + } \ } while (0) // Check whether the parameters are equal -#define GE_CHECK_EQ(val1, val2) \ - do { \ - if (val1 != val2) { \ - GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] is not equals to[%s]", #val1, #val2); \ - return ge::GRAPH_PARAM_INVALID; \ - } \ +#define GE_CHECK_EQ(val1, val2) \ + do { \ + if (val1 != val2) { \ + GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] is not equals to[%s]", #val1, #val2); \ + return ge::GRAPH_PARAM_INVALID; \ + } \ } while (0) // Verify that the value on the left is less than or equal to the value on the right -#define GE_CHECK_LE(lhs, rhs) \ - do { \ - if (lhs > rhs) { \ - GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] is greater than[%s]", #lhs, #rhs); \ - return ge::GRAPH_PARAM_INVALID; \ - } \ +#define GE_CHECK_LE(lhs, rhs) \ + do { \ + if (lhs > rhs) { \ + GELOGE(ge::GRAPH_PARAM_INVALID, "param[%s] is greater than[%s]", #lhs, #rhs); \ + return ge::GRAPH_PARAM_INVALID; \ + } \ } while (0) // Check whether the parameters are equal -#define GE_CHECK_EQ_WITH_LOG(val1, val2, ...) \ - do { \ - if (val1 != val2) { \ - GELOGE(ge::GRAPH_PARAM_INVALID, __VA_ARGS__); \ - return ge::GRAPH_PARAM_INVALID; \ - } \ +#define GE_CHECK_EQ_WITH_LOG(val1, val2, ...) \ + do { \ + if (val1 != val2) { \ + GELOGE(ge::GRAPH_PARAM_INVALID, __VA_ARGS__); \ + return ge::GRAPH_PARAM_INVALID; \ + } \ } while (0) // If expr is false, the custom statement is executed #define CHECK_FALSE_EXEC(expr, exec_expr, ...) \ - do { \ - bool b = (expr); \ - if (!b) { \ - exec_expr; \ - } \ + do { \ + bool b = (expr); \ + if (!b) { \ + exec_expr; \ + } \ } while (0) #define GE_DELETE_NEW_SINGLE(var) \ - do { \ - if (var != nullptr) { \ - delete var; \ - var = nullptr; \ - } \ + do { \ + if (var != nullptr) { \ + delete var; \ + var = nullptr; \ + } \ } while (0) #define GE_DELETE_NEW_ARRAY(var) \ - do { \ - if (var != nullptr) { \ - delete[] var; \ - var = nullptr; \ - } \ + do { \ + if (var != nullptr) { \ + delete[] var; \ + var = nullptr; \ + } \ } while (0) template diff --git a/src/common/graph/debug/graph_debug.cc b/src/common/graph/debug/graph_debug.cc index 6cc5e0ca..930609b2 100644 --- a/src/common/graph/debug/graph_debug.cc +++ b/src/common/graph/debug/graph_debug.cc @@ -31,9 +31,9 @@ namespace ge { std::unordered_set control_anchor; std::vector types = { - "DT_FLOAT", "DT_FLOAT16", "DT_INT8", "DT_INT32", "DT_UINT8", "", - "DT_INT16", "DT_UINT16", "DT_UINT32", "DT_INT64", "DT_UINT64", "DT_DOUBLE", - "DT_BOOL", "DT_DUAL", "DT_DUAL_SUB_INT8", "DT_DUAL_SUB_UINT8", "DT_UNDEFINED"}; + "DT_FLOAT", "DT_FLOAT16", "DT_INT8", "DT_INT32", "DT_UINT8", "", + "DT_INT16", "DT_UINT16", "DT_UINT32", "DT_INT64", "DT_UINT64", "DT_DOUBLE", + "DT_BOOL", "DT_DUAL", "DT_DUAL_SUB_INT8", "DT_DUAL_SUB_UINT8", "DT_UNDEFINED"}; std::vector formats = {"FORMAT_NCHW", "FORMAT_NHWC", @@ -92,7 +92,7 @@ void GraphDebugPrinter::DumpNodeToDot(const NodePtr node, std::ostringstream &ou auto input_anchors = node->GetAllInDataAnchors(); auto op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL_EXEC(op_desc, return); + GE_CHECK_NOTNULL_EXEC(op_desc, return ); if (!input_anchors.empty()) { out_ << TAB << TAB << ""; } @@ -138,7 +138,7 @@ void GraphDebugPrinter::DumpEdgeToDot(const NodePtr node, std::ostringstream &ou } auto all_out_anchor = node->GetAllOutDataAnchors(); auto op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL_EXEC(op_desc, return); + GE_CHECK_NOTNULL_EXEC(op_desc, return ); for (const auto &anchor : all_out_anchor) { auto src_anchor = anchor; auto src_node_name = node->GetName(); @@ -170,12 +170,12 @@ void GraphDebugPrinter::DumpEdgeToDot(const NodePtr node, std::ostringstream &ou if (flag != DOT_NOT_SHOW_EDGE_LABEL && in_data_anchor) { string label; auto src_ops = src_anchor->GetOwnerNode()->GetOpDesc(); - GE_CHECK_NOTNULL_EXEC(src_ops, return); + GE_CHECK_NOTNULL_EXEC(src_ops, return ); auto src_shape = src_ops->GetOutputDesc(src_anchor->GetIdx()).GetShape(); auto dim = src_shape.GetDims(); std::ostringstream tensor_info; if (dim.size() > 0) { - for (unsigned int i = 0; i < dim.size(); i++) { + for (size_t i = 0; i < dim.size(); i++) { if (i != dim.size() - 1) { tensor_info << dim[i] << "x"; } else { @@ -186,7 +186,7 @@ void GraphDebugPrinter::DumpEdgeToDot(const NodePtr node, std::ostringstream &ou tensor_info << "?"; } auto src_tensor_desc = src_ops->GetOutputDescPtr(src_anchor->GetIdx()); - GE_CHECK_NOTNULL_EXEC(src_tensor_desc, return); + GE_CHECK_NOTNULL_EXEC(src_tensor_desc, return ); auto format = src_tensor_desc->GetFormat(); auto datatype = src_tensor_desc->GetDataType(); tensor_info << " : " << formats[format] << " : " << types[datatype]; diff --git a/src/common/graph/format_refiner.cc b/src/common/graph/format_refiner.cc index 1b95b500..3217b17a 100644 --- a/src/common/graph/format_refiner.cc +++ b/src/common/graph/format_refiner.cc @@ -67,6 +67,7 @@ graphStatus FormatRefiner::GetAnchorPoints(const ge::ComputeGraphPtr &graph, std anchor_points.clear(); // Get all anchor point nodes and switch nodes for (const auto &node_ptr : graph->GetAllNodes()) { + std::vector is_node_set_format; if (node_ptr == nullptr) { return GRAPH_FAILED; } @@ -166,7 +167,7 @@ graphStatus FormatRefiner::BackInferProcess(std::deque &nodes, ge:: if (ge_tensor_desc.GetOriginFormat() == FORMAT_ND) { auto dim_num = ge_tensor_desc.GetShape().GetDimNum(); if (dim_num == 0) { - GELOGI("node name:%s idx:%d out is scalar. stop back infer!", peer_out_data_node->GetName().c_str(), idx); + GELOGD("node name:%s idx:%d out is scalar. stop back infer!", peer_out_data_node->GetName().c_str(), idx); continue; } /// Check whether node to change dims () @@ -175,7 +176,7 @@ graphStatus FormatRefiner::BackInferProcess(std::deque &nodes, ge:: auto iter1 = kChangeDimNodes.find(peer_out_data_node_type); // 4 means dims num if ((iter1 != kChangeDimNodes.end()) && (dim_num < 4)) { - GELOGI("Node[%s] is change dim node and shape is smaller than 4. do not modify format", + GELOGD("Node[%s] is change dim node and shape is smaller than 4. do not modify format", (peer_out_data_node->GetName()).c_str()); continue; } @@ -235,7 +236,7 @@ graphStatus FormatRefiner::ForwardInferProcess(std::deque &nodes, g auto iter1 = kChangeDimNodes.find(peer_in_data_node_type); // 4 means dims num if ((iter1 != kChangeDimNodes.end()) && (dim_num < 4)) { - GELOGI("Node[%s] is change dim node. do not infer origin format", (peer_in_data_node->GetName()).c_str()); + GELOGD("Node[%s] is change dim node. do not infer origin format", (peer_in_data_node->GetName()).c_str()); continue; } ge_tensor_desc.SetOriginFormat(to_be_set_format); @@ -292,7 +293,7 @@ graphStatus FormatRefiner::DataNodeFormatProcess(std::vector &data_ return GRAPH_SUCCESS; } GELOGD("Enter DataNodeFormatProcess"); - std::vector uninferred_data_nodes; + std::vector uninfered_data_nodes; // Check and renew data nodes format for (const auto &data_node : data_nodes) { GE_CHECK_NOTNULL(data_node); @@ -301,10 +302,10 @@ graphStatus FormatRefiner::DataNodeFormatProcess(std::vector &data_ GE_CHECK_NOTNULL(op_desc->GetOutputDescPtr(0)); auto curr_format = op_desc->GetOutputDescPtr(0)->GetOriginFormat(); if (curr_format != FORMAT_ND) { - // Data format has been inferred , continue + // Data format has been infered , continue continue; } - // Set format for un-inferred data node + // Set format for un-infered data node auto input_descs = op_desc->GetAllInputsDescPtr(); auto output_descs = op_desc->GetAllOutputsDescPtr(); @@ -320,10 +321,10 @@ graphStatus FormatRefiner::DataNodeFormatProcess(std::vector &data_ output_desc->SetFormat(data_format); } } - uninferred_data_nodes.push_back(data_node); + uninfered_data_nodes.push_back(data_node); } // Reinfer format from uninfered data nodes - for (const auto &node : uninferred_data_nodes) { + for (const auto &node : uninfered_data_nodes) { if (node == nullptr) { continue; } @@ -341,7 +342,7 @@ graphStatus FormatRefiner::DataNodeFormatProcess(std::vector &data_ graphStatus FormatRefiner::InferOrigineFormat(const ge::ComputeGraphPtr &graph) { GELOGI("Enter InferOrigineFormat process!"); - // True: inferred false:no-inferred + // True: infered false:no-infered std::unordered_map node_status; std::vector anchor_points; std::vector data_nodes; @@ -373,7 +374,7 @@ graphStatus FormatRefiner::InferOrigineFormat(const ge::ComputeGraphPtr &graph) } } /// According to discuss with sys-enginer, data node default format is ND.Its format - /// should be set by inferred.But if some data-node can not be got by infer, set context's + /// should be set by infered.But if some data-node can not be got by infer, set context's /// format for these data nodes. /// Notice: ignore 5D formats auto data_format = graph->GetDataFormat(); diff --git a/src/common/graph/format_refiner.h b/src/common/graph/format_refiner.h index 3b732d2f..fa40a034 100644 --- a/src/common/graph/format_refiner.h +++ b/src/common/graph/format_refiner.h @@ -21,7 +21,6 @@ #include #include #include - #include "./compute_graph.h" #include "./external/graph/types.h" #include "./ge_error_codes.h" diff --git a/src/common/graph/ge_attr_define.cc b/src/common/graph/ge_attr_define.cc index d63b7761..67587632 100644 --- a/src/common/graph/ge_attr_define.cc +++ b/src/common/graph/ge_attr_define.cc @@ -1,4 +1,4 @@ -/** +/** * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/common/graph/ge_attr_value.cc b/src/common/graph/ge_attr_value.cc index fe9cbfec..8be42429 100644 --- a/src/common/graph/ge_attr_value.cc +++ b/src/common/graph/ge_attr_value.cc @@ -15,7 +15,6 @@ */ #include "graph/ge_attr_value.h" - #include "graph/ge_tensor.h" #include "external/graph/graph.h" #include "utils/attr_utils.h" @@ -35,7 +34,7 @@ namespace ge { GeAttrValue::NamedAttrs::NamedAttrs() { named_attrs_.InitDefault(); } GeAttrValue::NamedAttrs::NamedAttrs(const ProtoMsgOwner &owner, proto::NamedAttrs *proto_msg) - : named_attrs_(owner, proto_msg) {} + : named_attrs_(owner, proto_msg) {} // lint !e1744 void GeAttrValue::NamedAttrs::SetName(const std::string &name) { auto proto_msg = named_attrs_.GetProtoMsg(); @@ -155,29 +154,29 @@ class GeAttrValueImp { }; map GeAttrValueImp::attr_val_one_type_map_ = { - {proto::AttrDef::kI, GeAttrValue::VT_INT}, - {proto::AttrDef::kF, GeAttrValue::VT_FLOAT}, - {proto::AttrDef::kB, GeAttrValue::VT_BOOL}, - {proto::AttrDef::kS, GeAttrValue::VT_STRING}, - {proto::AttrDef::kT, GeAttrValue::VT_TENSOR}, - {proto::AttrDef::kTd, GeAttrValue::VT_TENSOR_DESC}, - {proto::AttrDef::kG, GeAttrValue::VT_GRAPH}, - {proto::AttrDef::kBt, GeAttrValue::VT_BYTES}, - {proto::AttrDef::kFunc, GeAttrValue::VT_NAMED_ATTRS}, - {proto::AttrDef::kListListInt, GeAttrValue::VT_LIST_LIST_INT}, - {proto::AttrDef::kDt, GeAttrValue::VT_DATA_TYPE}, + {proto::AttrDef::kI, GeAttrValue::VT_INT}, + {proto::AttrDef::kF, GeAttrValue::VT_FLOAT}, + {proto::AttrDef::kB, GeAttrValue::VT_BOOL}, + {proto::AttrDef::kS, GeAttrValue::VT_STRING}, + {proto::AttrDef::kT, GeAttrValue::VT_TENSOR}, + {proto::AttrDef::kTd, GeAttrValue::VT_TENSOR_DESC}, + {proto::AttrDef::kG, GeAttrValue::VT_GRAPH}, + {proto::AttrDef::kBt, GeAttrValue::VT_BYTES}, + {proto::AttrDef::kFunc, GeAttrValue::VT_NAMED_ATTRS}, + {proto::AttrDef::kListListInt, GeAttrValue::VT_LIST_LIST_INT}, + {proto::AttrDef::kDt, GeAttrValue::VT_DATA_TYPE}, }; map GeAttrValueImp::attr_val_list_type_map_ = { - {proto::AttrDef_ListValue_ListValueType_VT_LIST_INT, GeAttrValue::VT_LIST_INT}, - {proto::AttrDef_ListValue_ListValueType_VT_LIST_FLOAT, GeAttrValue::VT_LIST_FLOAT}, - {proto::AttrDef_ListValue_ListValueType_VT_LIST_BOOL, GeAttrValue::VT_LIST_BOOL}, - {proto::AttrDef_ListValue_ListValueType_VT_LIST_STRING, GeAttrValue::VT_LIST_STRING}, - {proto::AttrDef_ListValue_ListValueType_VT_LIST_TENSOR, GeAttrValue::VT_LIST_TENSOR}, - {proto::AttrDef_ListValue_ListValueType_VT_LIST_TENSOR_DESC, GeAttrValue::VT_LIST_TENSOR_DESC}, - {proto::AttrDef_ListValue_ListValueType_VT_LIST_GRAPH, GeAttrValue::VT_LIST_GRAPH}, - {proto::AttrDef_ListValue_ListValueType_VT_LIST_BYTES, GeAttrValue::VT_LIST_BYTES}, - {proto::AttrDef_ListValue_ListValueType_VT_LIST_NAMED_ATTRS, GeAttrValue::VT_LIST_NAMED_ATTRS}, - {proto::AttrDef_ListValue_ListValueType_VT_LIST_DATA_TYPE, GeAttrValue::VT_LIST_DATA_TYPE}, + {proto::AttrDef_ListValue_ListValueType_VT_LIST_INT, GeAttrValue::VT_LIST_INT}, + {proto::AttrDef_ListValue_ListValueType_VT_LIST_FLOAT, GeAttrValue::VT_LIST_FLOAT}, + {proto::AttrDef_ListValue_ListValueType_VT_LIST_BOOL, GeAttrValue::VT_LIST_BOOL}, + {proto::AttrDef_ListValue_ListValueType_VT_LIST_STRING, GeAttrValue::VT_LIST_STRING}, + {proto::AttrDef_ListValue_ListValueType_VT_LIST_TENSOR, GeAttrValue::VT_LIST_TENSOR}, + {proto::AttrDef_ListValue_ListValueType_VT_LIST_TENSOR_DESC, GeAttrValue::VT_LIST_TENSOR_DESC}, + {proto::AttrDef_ListValue_ListValueType_VT_LIST_GRAPH, GeAttrValue::VT_LIST_GRAPH}, + {proto::AttrDef_ListValue_ListValueType_VT_LIST_BYTES, GeAttrValue::VT_LIST_BYTES}, + {proto::AttrDef_ListValue_ListValueType_VT_LIST_NAMED_ATTRS, GeAttrValue::VT_LIST_NAMED_ATTRS}, + {proto::AttrDef_ListValue_ListValueType_VT_LIST_DATA_TYPE, GeAttrValue::VT_LIST_DATA_TYPE}, }; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeAttrValue::GeAttrValue() { value_.InitDefault(); } @@ -240,7 +239,7 @@ ATTR_VALUE_SET_GET_IMP(GeAttrValue::STR) ATTR_VALUE_SET_GET_IMP(vector) ATTR_VALUE_SET_GET_IMP(GeAttrValue::INT) ATTR_VALUE_SET_GET_IMP(vector) -ATTR_VALUE_SET_GET_IMP(GeAttrValue::FLOAT) +ATTR_VALUE_SET_GET_IMP(GeAttrValue::FLOAT) // lint !e524 ATTR_VALUE_SET_GET_IMP(vector) ATTR_VALUE_SET_GET_IMP(GeAttrValue::BOOL) ATTR_VALUE_SET_GET_IMP(vector) @@ -254,9 +253,11 @@ ATTR_VALUE_SET_GET_IMP(GeAttrValue::BYTES) ATTR_VALUE_SET_GET_IMP(vector) ATTR_VALUE_SET_GET_IMP(GeAttrValue::NAMED_ATTRS) ATTR_VALUE_SET_GET_IMP(vector) +/*lint -e665*/ ATTR_VALUE_SET_GET_IMP(vector>) -ATTR_VALUE_SET_GET_IMP(vector) -ATTR_VALUE_SET_GET_IMP(GeAttrValue::DATA_TYPE) +/*lint +e665*/ +ATTR_VALUE_SET_GET_IMP(vector) // lint !e665 +ATTR_VALUE_SET_GET_IMP(GeAttrValue::DATA_TYPE) // lint !e665 #undef ATTR_VALUE_SET_GET_IMP @@ -275,8 +276,8 @@ class AttrUtilsHelper { } inline static bool GetValueCheckListType( - const proto::AttrDef &attr_def, proto::AttrDef_ListValue_ListValueType proto_list_case, - const std::function item_check_fun) { + const proto::AttrDef &attr_def, proto::AttrDef_ListValue_ListValueType proto_list_case, + const std::function item_check_fun) { if (attr_def.value_case() != proto::AttrDef::kList) { GELOGW("Check ListType Failed, value_case %u", attr_def.value_case()); return false; @@ -636,9 +637,8 @@ bool GeAttrValueImp::SetValue(proto::AttrDef &proto_attr_val, const ge::DataType #define ATTR_VALUE_IMP_GET_LIST(ValType, proto_list_case, protoItem) \ bool GeAttrValueImp::GetValue(const proto::AttrDef &proto_attr_val, const ProtoMsgOwner &, vector &value) { \ value.clear(); \ - if (!AttrUtilsHelper::GetValueCheckListType(proto_attr_val, \ - proto::AttrDef_ListValue_ListValueType_##proto_list_case, \ - ListValueItemCheck(protoItem))) { \ + if (!AttrUtilsHelper::GetValueCheckListType( \ + proto_attr_val, proto::AttrDef_ListValue_ListValueType_##proto_list_case, ListValueItemCheck(protoItem))) { \ return false; \ } \ auto &list = proto_attr_val.list(); \ @@ -673,7 +673,7 @@ bool GeAttrValueImp::GetValue(const proto::AttrDef &proto_attr_val, const ProtoM bool GeAttrValueImp::GetValue(const proto::AttrDef &proto_attr_val, const ProtoMsgOwner &, vector &value) { if (!AttrUtilsHelper::GetValueCheckListType( - proto_attr_val, proto::AttrDef_ListValue_ListValueType_VT_LIST_TENSOR_DESC, ListValueItemCheck(td))) { + proto_attr_val, proto::AttrDef_ListValue_ListValueType_VT_LIST_TENSOR_DESC, ListValueItemCheck(td))) { return false; } auto &list = proto_attr_val.list(); @@ -693,8 +693,8 @@ bool GeAttrValueImp::GetValue(const proto::AttrDef &proto_attr_val, const ProtoM if (!AttrUtilsHelper::GetValueCheckType(proto_attr_val, proto::AttrDef::kT)) { return false; } - value = std::shared_ptr( - new (std::nothrow) GeTensor(proto_owner, const_cast(proto_attr_val).mutable_t())); + value = std::shared_ptr(new (std::nothrow) + GeTensor(proto_owner, const_cast(proto_attr_val).mutable_t())); GE_CHK_BOOL_RET_STATUS(value != nullptr, false, "value is nullptr"); return true; } @@ -757,7 +757,7 @@ bool GeAttrValueImp::GetValue(const proto::AttrDef &proto_attr_val, const ProtoM vector &value) { value.clear(); if (!AttrUtilsHelper::GetValueCheckListType( - proto_attr_val, proto::AttrDef_ListValue_ListValueType_VT_LIST_NAMED_ATTRS, ListValueItemCheck(na))) { + proto_attr_val, proto::AttrDef_ListValue_ListValueType_VT_LIST_NAMED_ATTRS, ListValueItemCheck(na))) { return false; } auto &list = proto_attr_val.list(); @@ -931,7 +931,7 @@ bool AttrUtils::HasAttr(ConstAttrHolderAdapter &&obj, const string &name) { #define ATTR_UTILS_SET_IMP(FuncName, Type) \ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool AttrUtils::Set##FuncName( \ - AttrHolderAdapter &&obj, const string &name, const Type &value) { \ + AttrHolderAdapter &&obj, const string &name, const Type &value) { \ proto::AttrDef *proto_attr_val = nullptr; \ if (!AttrUtilsHelper::MutableAttrMapItem(obj.get(), name, proto_attr_val) || proto_attr_val == nullptr) { \ return false; \ diff --git a/src/common/graph/ge_tensor.cc b/src/common/graph/ge_tensor.cc index 7c7e0433..d5def041 100644 --- a/src/common/graph/ge_tensor.cc +++ b/src/common/graph/ge_tensor.cc @@ -15,12 +15,10 @@ */ #include "graph/ge_tensor.h" - #include #include #include #include - #include "debug/ge_attr_define.h" #include "debug/ge_util.h" #include "framework/common/debug/ge_log.h" @@ -36,37 +34,37 @@ namespace ge { static const char *const kKeyDataTypeSelfDefined = "__tensor_desc_data_type__"; static const std::map kDataTypeMap = { - {DT_UNDEFINED, proto::DT_UNDEFINED}, - {DT_FLOAT, proto::DT_FLOAT}, - {DT_FLOAT16, proto::DT_FLOAT16}, - {DT_INT8, proto::DT_INT8}, - {DT_UINT8, proto::DT_UINT8}, - {DT_INT16, proto::DT_INT16}, - {DT_UINT16, proto::DT_UINT16}, - {DT_INT32, proto::DT_INT32}, - {DT_INT64, proto::DT_INT64}, - {DT_UINT32, proto::DT_UINT32}, - {DT_UINT64, proto::DT_UINT64}, - {DT_BOOL, proto::DT_BOOL}, - {DT_DOUBLE, proto::DT_DOUBLE}, - {DT_DUAL, proto::DT_DUAL}, - {DT_DUAL_SUB_INT8, proto::DT_DUAL_SUB_INT8}, - {DT_DUAL_SUB_UINT8, proto::DT_DUAL_SUB_UINT8}, - {DT_COMPLEX64, proto::DT_COMPLEX64}, - {DT_COMPLEX128, proto::DT_COMPLEX128}, - {DT_QINT8, proto::DT_QINT8}, - {DT_QINT16, proto::DT_QINT16}, - {DT_QINT32, proto::DT_QINT32}, - {DT_QUINT8, proto::DT_QUINT8}, - {DT_QUINT16, proto::DT_QUINT16}, - {DT_RESOURCE, proto::DT_RESOURCE}, - {DT_STRING_REF, proto::DT_STRING_REF}, - {DT_STRING, proto::DT_STRING}, + {DT_UNDEFINED, proto::DT_UNDEFINED}, + {DT_FLOAT, proto::DT_FLOAT}, + {DT_FLOAT16, proto::DT_FLOAT16}, + {DT_INT8, proto::DT_INT8}, + {DT_UINT8, proto::DT_UINT8}, + {DT_INT16, proto::DT_INT16}, + {DT_UINT16, proto::DT_UINT16}, + {DT_INT32, proto::DT_INT32}, + {DT_INT64, proto::DT_INT64}, + {DT_UINT32, proto::DT_UINT32}, + {DT_UINT64, proto::DT_UINT64}, + {DT_BOOL, proto::DT_BOOL}, + {DT_DOUBLE, proto::DT_DOUBLE}, + {DT_DUAL, proto::DT_DUAL}, + {DT_DUAL_SUB_INT8, proto::DT_DUAL_SUB_INT8}, + {DT_DUAL_SUB_UINT8, proto::DT_DUAL_SUB_UINT8}, + {DT_COMPLEX64, proto::DT_COMPLEX64}, + {DT_COMPLEX128, proto::DT_COMPLEX128}, + {DT_QINT8, proto::DT_QINT8}, + {DT_QINT16, proto::DT_QINT16}, + {DT_QINT32, proto::DT_QINT32}, + {DT_QUINT8, proto::DT_QUINT8}, + {DT_QUINT16, proto::DT_QUINT16}, + {DT_RESOURCE, proto::DT_RESOURCE}, + {DT_STRING_REF, proto::DT_STRING_REF}, + {DT_STRING, proto::DT_STRING}, }; static const std::map kDataTypeSelfDefinedMap = { - {DT_DUAL, 13}, {DT_DUAL_SUB_INT8, 14}, {DT_DUAL_SUB_UINT8, 15}, {DT_COMPLEX64, 16}, {DT_COMPLEX128, 17}, - {DT_QINT8, 18}, {DT_QINT16, 19}, {DT_QINT32, 20}, {DT_QUINT8, 21}, {DT_QUINT16, 22}, + {DT_DUAL, 13}, {DT_DUAL_SUB_INT8, 14}, {DT_DUAL_SUB_UINT8, 15}, {DT_COMPLEX64, 16}, {DT_COMPLEX128, 17}, + {DT_QINT8, 18}, {DT_QINT16, 19}, {DT_QINT32, 20}, {DT_QUINT8, 21}, {DT_QUINT16, 22}, }; GeShape::GeShape() { shape_def_.InitDefault(); } @@ -287,35 +285,32 @@ bool GeTensorDesc::GeTensorDescAttrsAreEqual(const GeTensorDesc &r_ge_tensor_des const auto &r_tensor_descriptor = r_ge_tensor_desc.tensor_descriptor_.GetProtoMsg(); if ((tensor_descriptor != nullptr) && (r_tensor_descriptor != nullptr)) { // Message TensorDescriptor in ge_ir.proto - return (IsEqual(tensor_descriptor->name(), r_tensor_descriptor->name(), "TensorDescriptor.name()") && - IsEqual(tensor_descriptor->dtype(), r_tensor_descriptor->dtype(), "TensorDescriptor.dtype()") && - // Message ShapeDef in ge_ir.proto - IsEqual(ToString(tensor_descriptor->shape().dim()), ToString(r_tensor_descriptor->shape().dim()), - "TensorDescriptor.shape().dim()") && - IsEqual(tensor_descriptor->layout(), r_tensor_descriptor->layout(), "TensorDescriptor.layout()") && - IsEqual(tensor_descriptor->has_out_attr(), r_tensor_descriptor->has_out_attr(), - "TensorDescriptor.has_out_attr()") && - IsEqual(tensor_descriptor->size(), r_tensor_descriptor->size(), "TensorDescriptor.size()") && - IsEqual(tensor_descriptor->weight_size(), r_tensor_descriptor->weight_size(), - "TensorDescriptor.weight_size()") && - IsEqual(tensor_descriptor->reuse_input(), r_tensor_descriptor->reuse_input(), - "TensorDescriptor.reuse_input()") && - IsEqual(tensor_descriptor->output_tensor(), r_tensor_descriptor->output_tensor(), - "TensorDescriptor.output_tensor()") && - IsEqual(tensor_descriptor->device_type(), r_tensor_descriptor->device_type(), - "TensorDescriptor.device_type()") && - IsEqual(tensor_descriptor->input_tensor(), r_tensor_descriptor->input_tensor(), - "TensorDescriptor.input_tensor()") && - IsEqual(tensor_descriptor->real_dim_cnt(), r_tensor_descriptor->real_dim_cnt(), - "TensorDescriptor.real_dim_cnt()") && - IsEqual(tensor_descriptor->reuse_input_index(), r_tensor_descriptor->reuse_input_index(), - "TensorDescriptor.reuse_input_index()") && - IsEqual(tensor_descriptor->data_offset(), r_tensor_descriptor->data_offset(), - "TensorDescriptor.data_offset()") && - IsEqual(tensor_descriptor->cmps_size(), r_tensor_descriptor->cmps_size(), "TensorDescriptor.cmps_size()") && - IsEqual(tensor_descriptor->cmps_tab(), r_tensor_descriptor->cmps_tab(), "TensorDescriptor.cmps_tab()") && - IsEqual(tensor_descriptor->cmps_tab_offset(), r_tensor_descriptor->cmps_tab_offset(), - "TensorDescriptor.cmps_tab_offset()")); + return ( + IsEqual(tensor_descriptor->name(), r_tensor_descriptor->name(), "TensorDescriptor.name()") && + IsEqual(tensor_descriptor->dtype(), r_tensor_descriptor->dtype(), "TensorDescriptor.dtype()") && + // Message ShapeDef in ge_ir.proto + IsEqual(ToString(tensor_descriptor->shape().dim()), ToString(r_tensor_descriptor->shape().dim()), + "TensorDescriptor.shape().dim()") && + IsEqual(tensor_descriptor->layout(), r_tensor_descriptor->layout(), "TensorDescriptor.layout()") && + IsEqual(tensor_descriptor->has_out_attr(), r_tensor_descriptor->has_out_attr(), + "TensorDescriptor.has_out_attr()") && + IsEqual(tensor_descriptor->size(), r_tensor_descriptor->size(), "TensorDescriptor.size()") && + IsEqual(tensor_descriptor->weight_size(), r_tensor_descriptor->weight_size(), "TensorDescriptor.weight_size()") && + IsEqual(tensor_descriptor->reuse_input(), r_tensor_descriptor->reuse_input(), "TensorDescriptor.reuse_input()") && + IsEqual(tensor_descriptor->output_tensor(), r_tensor_descriptor->output_tensor(), + "TensorDescriptor.output_tensor()") && + IsEqual(tensor_descriptor->device_type(), r_tensor_descriptor->device_type(), "TensorDescriptor.device_type()") && + IsEqual(tensor_descriptor->input_tensor(), r_tensor_descriptor->input_tensor(), + "TensorDescriptor.input_tensor()") && + IsEqual(tensor_descriptor->real_dim_cnt(), r_tensor_descriptor->real_dim_cnt(), + "TensorDescriptor.real_dim_cnt()") && + IsEqual(tensor_descriptor->reuse_input_index(), r_tensor_descriptor->reuse_input_index(), + "TensorDescriptor.reuse_input_index()") && + IsEqual(tensor_descriptor->data_offset(), r_tensor_descriptor->data_offset(), "TensorDescriptor.data_offset()") && + IsEqual(tensor_descriptor->cmps_size(), r_tensor_descriptor->cmps_size(), "TensorDescriptor.cmps_size()") && + IsEqual(tensor_descriptor->cmps_tab(), r_tensor_descriptor->cmps_tab(), "TensorDescriptor.cmps_tab()") && + IsEqual(tensor_descriptor->cmps_tab_offset(), r_tensor_descriptor->cmps_tab_offset(), + "TensorDescriptor.cmps_tab_offset()")); } else { return ((tensor_descriptor == nullptr) && (r_tensor_descriptor == nullptr)); } @@ -575,9 +570,7 @@ GeTensorDesc &GeTensor::DescReference() const { return __desc_; } -void GeTensor::SetTensorDesc(const GeTensorDesc &tensor_desc) { - DescReference() = tensor_desc; -} +void GeTensor::SetTensorDesc(const GeTensorDesc &tensor_desc) { DescReference() = tensor_desc; } const Buffer GeTensor::GetData() const { auto proto_msg = tensor_def_.GetProtoMsg(); @@ -741,10 +734,12 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY void TensorUtils::SetOutputTensor } static map device_to_str_map{ - {0, "NPU"}, {1, "CPU"}, + {0, "NPU"}, + {1, "CPU"}, }; static map str_to_device_map{ - {"NPU", 0}, {"CPU", 1}, + {"NPU", 0}, + {"CPU", 1}, }; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus TensorUtils::GetDeviceType(const GeTensorDesc &tensor_desc, @@ -901,7 +896,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY void TensorUtils::SetCmpsInfo(GeT } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool TensorUtils::HasAlloffsetQuantizeInfo( - const GeTensorDesc &tensor_desc) { + const GeTensorDesc &tensor_desc) { return tensor_desc.HasAttr(TENSOR_UTILS_ALLOFFSET_QUANTIZE_INFO); } diff --git a/src/common/graph/graph.cc b/src/common/graph/graph.cc index 5462e8c5..4d7c2a3b 100644 --- a/src/common/graph/graph.cc +++ b/src/common/graph/graph.cc @@ -15,7 +15,6 @@ */ #include "external/graph/graph.h" - #include "debug/ge_util.h" #include "external/graph/operator.h" #include "framework/common/debug/ge_log.h" diff --git a/src/common/graph/inference_context.cc b/src/common/graph/inference_context.cc index 9e2d96ab..ed8193dc 100644 --- a/src/common/graph/inference_context.cc +++ b/src/common/graph/inference_context.cc @@ -15,39 +15,98 @@ */ #include "external/graph/inference_context.h" +#include "debug/ge_util.h" namespace ge { -ShapeAndType::ShapeAndType(const Shape &shape, DataType data_type) : shape_(shape), data_type_(data_type) {} +class ShapeAndTypeImpl { + public: + ShapeAndTypeImpl() = default; + ~ShapeAndTypeImpl() = default; -void ShapeAndType::SetShape(const Shape &shape) { shape_ = shape; } + ShapeAndTypeImpl(const Shape &shape, DataType data_type) : shape_(shape), data_type_(data_type) {} -void ShapeAndType::SetType(DataType data_type) { data_type_ = data_type; } + Shape shape_; + DataType data_type_ = DT_UNDEFINED; +}; -const Shape &ShapeAndType::GetShape() const { return shape_; } +class InferenceContextImpl { + public: + InferenceContextImpl() = default; + ~InferenceContextImpl() = default; -DataType ShapeAndType::GetDataType() const { return data_type_; } + // For deliver to op in pair, help to support dynamic shape + std::vector marks_; + std::vector> input_handle_shapes_and_types_; + std::vector> output_handle_shapes_and_types_; +}; + +ShapeAndType::ShapeAndType() { shape_and_type_impl_ = ComGraphMakeShared(); } + +ShapeAndType::ShapeAndType(const Shape &shape, DataType data_type) { + shape_and_type_impl_ = ComGraphMakeShared(shape, data_type); +} + +void ShapeAndType::SetShape(const Shape &shape) { + if (shape_and_type_impl_ != nullptr) { + shape_and_type_impl_->shape_ = shape; + } +} + +void ShapeAndType::SetType(DataType data_type) { + if (shape_and_type_impl_ != nullptr) { + shape_and_type_impl_->data_type_ = data_type; + } +} + +Shape ShapeAndType::GetShape() const { + if (shape_and_type_impl_ != nullptr) { + return shape_and_type_impl_->shape_; + } + return Shape(); +} + +DataType ShapeAndType::GetDataType() const { + if (shape_and_type_impl_ != nullptr) { + return shape_and_type_impl_->data_type_; + } + return DT_UNDEFINED; +} + +InferenceContext::InferenceContext(std::unique_ptr &impl) { + inference_context_impl_ = std::move(impl); +} + +std::unique_ptr InferenceContext::Create() { + std::unique_ptr impl = + std::unique_ptr(new (std::nothrow) InferenceContextImpl()); + if (impl == nullptr) { + return nullptr; + } + + return std::unique_ptr(new (std::nothrow) InferenceContext(impl)); +} void InferenceContext::SetInputHandleShapesAndTypes(std::vector> &&shapes_and_types) { - input_handle_shapes_and_types_.swap(shapes_and_types); + inference_context_impl_->input_handle_shapes_and_types_.swap(shapes_and_types); } const std::vector> &InferenceContext::GetInputHandleShapesAndTypes() const { - return input_handle_shapes_and_types_; + return inference_context_impl_->input_handle_shapes_and_types_; } const std::vector> &InferenceContext::GetOutputHandleShapesAndTypes() const { - return output_handle_shapes_and_types_; + return inference_context_impl_->output_handle_shapes_and_types_; } void InferenceContext::SetOutputHandleShapesAndTypes(const std::vector> &shapes_and_types) { - output_handle_shapes_and_types_ = shapes_and_types; + inference_context_impl_->output_handle_shapes_and_types_ = shapes_and_types; } void InferenceContext::SetOutputHandleShapesAndTypes(std::vector> &&shapes_and_types) { - output_handle_shapes_and_types_.swap(shapes_and_types); + inference_context_impl_->output_handle_shapes_and_types_.swap(shapes_and_types); } -void InferenceContext::SetMarks(const std::vector &marks) { marks_ = marks; } +void InferenceContext::SetMarks(const std::vector &marks) { inference_context_impl_->marks_ = marks; } -const std::vector &InferenceContext::GetMarks() const { return marks_; } +const std::vector &InferenceContext::GetMarks() const { return inference_context_impl_->marks_; } } // namespace ge diff --git a/src/common/graph/model.cc b/src/common/graph/model.cc index fef2af38..fac09670 100644 --- a/src/common/graph/model.cc +++ b/src/common/graph/model.cc @@ -15,7 +15,6 @@ */ #include "graph/model.h" - #include #include #include @@ -28,7 +27,6 @@ #include #include #include - #include "debug/ge_attr_define.h" #include "debug/ge_util.h" #include "framework/common/debug/ge_log.h" diff --git a/src/common/graph/model_serialize.cc b/src/common/graph/model_serialize.cc index 52397530..ebb61589 100644 --- a/src/common/graph/model_serialize.cc +++ b/src/common/graph/model_serialize.cc @@ -191,7 +191,7 @@ bool ModelSerializeImp::SerializeModel(const Model &model, proto::ModelDef *mode } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ModelSerializeImp::UnserializeTensor( - GeTensorPtr &tensor, proto::TensorDef &tensor_proto) { + GeTensorPtr &tensor, proto::TensorDef &tensor_proto) { tensor = std::shared_ptr(new (std::nothrow) GeTensor(protobuf_owner_, &tensor_proto)); if (tensor == nullptr) { GELOGE(GRAPH_FAILED, "tensor is nullptr"); @@ -208,14 +208,14 @@ bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_d // Input tensor for (auto &input_desc : *op_def_proto.mutable_input_desc()) { std::shared_ptr temp_value = - std::shared_ptr(new (std::nothrow) GeTensorDesc(protobuf_owner_, &input_desc)); + std::shared_ptr(new (std::nothrow) GeTensorDesc(protobuf_owner_, &input_desc)); GE_CHK_BOOL_RET_STATUS(temp_value != nullptr, false, "temp_value is nullptr"); op_desc->inputs_desc_.push_back(temp_value); } // Output tensor for (auto &output_desc : *op_def_proto.mutable_output_desc()) { std::shared_ptr temp_value = - std::shared_ptr(new (std::nothrow) GeTensorDesc(protobuf_owner_, &output_desc)); + std::shared_ptr(new (std::nothrow) GeTensorDesc(protobuf_owner_, &output_desc)); GE_CHK_BOOL_RET_STATUS(temp_value != nullptr, false, "temp_value is nullptr"); op_desc->outputs_desc_.push_back(temp_value); } @@ -265,13 +265,13 @@ bool ModelSerializeImp::HandleNodeNameRef() { item.dst_node_name.c_str(), item.dst_in_index); return false; } - GE_CHK_BOOL_ONLY_LOG((src_anchor->LinkTo(dst_anchor) == GRAPH_SUCCESS), " linkTo failed."); + GE_CHK_BOOL_ONLY_LOG((src_anchor->LinkTo(dst_anchor) == GRAPH_SUCCESS), " linkTo failed."); // lint !e737 } else { // Control edge auto src_anchor = src_node_it->second->GetOutControlAnchor(); auto dst_anchor = item.dst_node->GetInControlAnchor(); if (src_anchor != nullptr && dst_anchor != nullptr) { - GE_CHK_BOOL_ONLY_LOG((src_anchor->LinkTo(dst_anchor) == GRAPH_SUCCESS), " linkTo failed."); + GE_CHK_BOOL_ONLY_LOG((src_anchor->LinkTo(dst_anchor) == GRAPH_SUCCESS), " linkTo failed."); // lint !e737 } } } diff --git a/src/common/graph/node.cc b/src/common/graph/node.cc index 01866be4..42558ddf 100644 --- a/src/common/graph/node.cc +++ b/src/common/graph/node.cc @@ -15,9 +15,7 @@ */ #include "graph/node.h" - #include - #include "debug/ge_op_types.h" #include "debug/ge_util.h" #include "external/graph/operator_factory.h" @@ -533,7 +531,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Node::Vistor Node::GetIn } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool Node::IsAllInNodesSeen( - std::unordered_set &nodes_seen) const { + std::unordered_set &nodes_seen) const { for (const auto &in_anchor : in_data_anchors_) { GE_CHK_BOOL_EXEC((in_anchor != nullptr), continue, "in_data_anchor is nullptr"); auto out_anchor = in_anchor->GetPeerOutAnchor(); @@ -736,10 +734,10 @@ graphStatus Node::Verify() const { continue; } GE_CHK_BOOL_RET_STATUS( - op_->GetType() == data_type || op_->GetType() == aipp_data_type || op_->GetType() == const_type || - op_->GetType() == variable_type || op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || - in_anchor_ptr->GetPeerAnchors().size() > 0, - GRAPH_FAILED, "operator %s's input %d is not linked.", GetName().c_str(), in_anchor_ptr->GetIdx()); + op_->GetType() == data_type || op_->GetType() == aipp_data_type || op_->GetType() == const_type || + op_->GetType() == variable_type || op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || + in_anchor_ptr->GetPeerAnchors().size() > 0, + GRAPH_FAILED, "operator %s's input %d is not linked.", GetName().c_str(), in_anchor_ptr->GetIdx()); } string frameworkop_type = "FrameworkOp"; diff --git a/src/common/graph/op_desc.cc b/src/common/graph/op_desc.cc index 30bbb8fc..22120a37 100644 --- a/src/common/graph/op_desc.cc +++ b/src/common/graph/op_desc.cc @@ -15,7 +15,6 @@ */ #include "graph/op_desc.h" - #include "debug/ge_attr_define.h" #include "debug/ge_util.h" #include "external/graph/operator.h" @@ -33,6 +32,7 @@ using std::shared_ptr; using std::string; using std::vector; +/*lint -save -e521 -e681 -e732 -e737*/ namespace ge { const std::string ATTR_NAME_ID = "id"; @@ -302,29 +302,28 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool OpDesc::OpDescAttrsAreEqual( if ((op_def != nullptr) && (r_op_def != nullptr)) { // Message OpDef in ge_ir.proto return ( - IsEqual(op_def->name(), r_op_def->name(), "OpDef_.name()") && - IsEqual(op_def->type(), r_op_def->type(), "OpDef_.type()") && - IsEqual(ToString(op_def->input()), ToString(r_op_def->input()), "OpDef_.input()") && - IsEqual(op_def->has_out_attr(), r_op_def->has_out_attr(), "OpDef_.has_out_attr()") && - IsEqual(op_def->stream_id(), r_op_def->stream_id(), "OpDef_.stream_id()") && - IsEqual(ToString(op_def->input_name()), ToString(r_op_def->input_name()), "OpDef_.input_name()") && - IsEqual(ToString(op_def->src_name()), ToString(r_op_def->src_name()), "OpDef_.src_name()") && - IsEqual(ToString(op_def->dst_name()), ToString(r_op_def->dst_name()), "OpDef_.dst_name()") && - IsEqual(ToString(op_def->src_index()), ToString(r_op_def->src_index()), "OpDef_.src_index()") && - IsEqual(ToString(op_def->dst_index()), ToString(r_op_def->dst_index()), "OpDef_.dst_index()") && - IsEqual(ToString(op_def->input_i()), ToString(r_op_def->input_i()), "OpDef_.input_i()") && - IsEqual(ToString(op_def->output_i()), ToString(r_op_def->output_i()), "OpDef_.output_i()") && - IsEqual(ToString(op_def->workspace()), ToString(r_op_def->workspace()), "OpDef_.workspace()") && - IsEqual(ToString(op_def->workspace_bytes()), ToString(r_op_def->workspace_bytes()), - "OpDef_.workspace_bytes()") && - IsEqual(ToString(op_def->is_input_const()), ToString(r_op_def->is_input_const()), "OpDef_.is_input_const()")); + IsEqual(op_def->name(), r_op_def->name(), "OpDef_.name()") && + IsEqual(op_def->type(), r_op_def->type(), "OpDef_.type()") && + IsEqual(ToString(op_def->input()), ToString(r_op_def->input()), "OpDef_.input()") && + IsEqual(op_def->has_out_attr(), r_op_def->has_out_attr(), "OpDef_.has_out_attr()") && + IsEqual(op_def->stream_id(), r_op_def->stream_id(), "OpDef_.stream_id()") && + IsEqual(ToString(op_def->input_name()), ToString(r_op_def->input_name()), "OpDef_.input_name()") && + IsEqual(ToString(op_def->src_name()), ToString(r_op_def->src_name()), "OpDef_.src_name()") && + IsEqual(ToString(op_def->dst_name()), ToString(r_op_def->dst_name()), "OpDef_.dst_name()") && + IsEqual(ToString(op_def->src_index()), ToString(r_op_def->src_index()), "OpDef_.src_index()") && + IsEqual(ToString(op_def->dst_index()), ToString(r_op_def->dst_index()), "OpDef_.dst_index()") && + IsEqual(ToString(op_def->input_i()), ToString(r_op_def->input_i()), "OpDef_.input_i()") && + IsEqual(ToString(op_def->output_i()), ToString(r_op_def->output_i()), "OpDef_.output_i()") && + IsEqual(ToString(op_def->workspace()), ToString(r_op_def->workspace()), "OpDef_.workspace()") && + IsEqual(ToString(op_def->workspace_bytes()), ToString(r_op_def->workspace_bytes()), "OpDef_.workspace_bytes()") && + IsEqual(ToString(op_def->is_input_const()), ToString(r_op_def->is_input_const()), "OpDef_.is_input_const()")); } else { return ((op_def == nullptr) && (r_op_def == nullptr)); } } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool OpDesc::OpDescGenTensorDescsAreEqual( - const OpDesc &r_op_desc) const { + const OpDesc &r_op_desc) const { // 1.Verify inputs and outputs desc size const auto inputs_desc_size = this->inputs_desc_.size(); const auto r_inputs_desc_size = r_op_desc.inputs_desc_.size(); diff --git a/src/common/graph/op_imp.cc b/src/common/graph/op_imp.cc index 5b21c15f..9abf242b 100644 --- a/src/common/graph/op_imp.cc +++ b/src/common/graph/op_imp.cc @@ -20,14 +20,16 @@ #include "debug/ge_log.h" #include "debug/ge_util.h" +using namespace std; + namespace ge { + GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus -BroadCastInfer(const std::function()>& get_in1_shape, - const std::function()>& get_in2_shape, - const std::function& outShape)>& set_out_shape) { +BroadCastInfer(const function()>& get_in1_shape, const function()>& get_in2_shape, + const function& outShape)>& set_out_shape) { auto x1_shape = get_in1_shape(); auto x2_shape = get_in2_shape(); - std::vector y_shape; + vector y_shape; if (x1_shape.empty()) { y_shape = x2_shape; @@ -48,7 +50,7 @@ BroadCastInfer(const std::function()>& get_in1_shape, int x2_shape_size = static_cast(x2_shape.size()); for (int i = 0; i < x2_shape_size; i++) { bool shapeFlag = - ((x1_shape[i + len_diff] != x2_shape[i]) && (std::min(x1_shape[i + len_diff], x2_shape[i]) != 1)); + ((x1_shape[i + len_diff] != x2_shape[i]) && (std::min(x1_shape[i + len_diff], x2_shape[i]) != 1)); if (shapeFlag) { GE_LOGE("operands could not be broadcast together"); return GRAPH_FAILED; @@ -62,7 +64,7 @@ BroadCastInfer(const std::function()>& get_in1_shape, int x1_shape_size = static_cast(x1_shape.size()); for (int i = 0; i < x1_shape_size; i++) { bool shapeFlag = - ((x1_shape[i] != x2_shape[i - len_diff]) && (std::min(x1_shape[i], x2_shape[i - len_diff]) != 1)); + ((x1_shape[i] != x2_shape[i - len_diff]) && (std::min(x1_shape[i], x2_shape[i - len_diff]) != 1)); if (shapeFlag) { GE_LOGE("operands could not be broadcast together"); return GRAPH_FAILED; @@ -73,4 +75,5 @@ BroadCastInfer(const std::function()>& get_in1_shape, set_out_shape(y_shape); return GRAPH_SUCCESS; } + } // namespace ge diff --git a/src/common/graph/operator.cc b/src/common/graph/operator.cc index 0b98bf1e..1d8db14e 100644 --- a/src/common/graph/operator.cc +++ b/src/common/graph/operator.cc @@ -22,10 +22,12 @@ #include #include +//#include "./array_ops.h" #include "debug/ge_log.h" #include "debug/ge_op_types.h" #include "debug/ge_util.h" #include "external/graph/attr_value.h" +#include "external/graph/types.h" #include "framework/common/debug/ge_log.h" #include "graph/compute_graph.h" #include "graph/ge_attr_value.h" @@ -33,6 +35,7 @@ #include "graph/node.h" #include "graph/op_desc.h" #include "graph/operator_factory.h" +#include "graph/usr_types.h" #include "utils/graph_utils.h" #include "utils/op_desc_utils.h" #include "utils/tensor_adapter.h" @@ -74,6 +77,29 @@ class OpIO { int index_; std::shared_ptr owner_; }; + +class TensorTypeImpl { + public: + TensorTypeImpl() = default; + ~TensorTypeImpl() = default; + + std::vector dt_vec_; +}; + +TensorType::TensorType(DataType dt) { + tensor_type_impl_ = ComGraphMakeShared(); + if (tensor_type_impl_ != nullptr) { + tensor_type_impl_->dt_vec_.push_back(dt); + } +} + +TensorType::TensorType(const std::initializer_list &types) { + tensor_type_impl_ = ComGraphMakeShared(); + if (tensor_type_impl_ != nullptr) { + tensor_type_impl_->dt_vec_ = types; + } +} + class OperatorImpl : public std::enable_shared_from_this { friend class GraphBuilderImpl; friend class OpDescUtils; @@ -128,8 +154,15 @@ class OperatorImpl : public std::enable_shared_from_this { OpIO op_dst(dst_name, dst_index, shared_from_this()); src_op_impl->UpdateLinkMapImpl(src_name, op_dst); + auto output_desc = src_op_impl->GetOutputDesc(src_name); + auto input_desc = op_desc_->GetInputDesc(dst_name); + if (input_desc.GetFormat() == FORMAT_RESERVED) { + output_desc.SetFormat(FORMAT_ND); + } else { + output_desc.SetFormat(input_desc.GetFormat()); + } // Fix for linking opdesc - if (op_desc_->UpdateInputDesc(dst_name, src_op_impl->GetOutputDesc(src_name)) != GRAPH_SUCCESS) { + if (op_desc_->UpdateInputDesc(dst_name, output_desc) != GRAPH_SUCCESS) { GELOGE(GRAPH_FAILED, "Update inputdesc failed,dst name is %s, src name is %s", dst_name.c_str(), src_name.c_str()); return; @@ -146,10 +179,11 @@ class OperatorImpl : public std::enable_shared_from_this { int dst_index = op_desc_->GetInputIndexByName(dst_name); GE_CHK_BOOL_EXEC(dst_index >= 0, return, "Find input index by name failed. name[%s], op name:%s", dst_name.c_str(), op_desc_->GetName().c_str()); - GE_CHK_BOOL_EXEC(out_handler->GetOwner() != nullptr && out_handler->GetOwner()->GetOpDescImpl() != nullptr, return, - "out_handler invalid. name[%s]", dst_name.c_str()); + auto out_op_impl = out_handler->GetOwner(); + GE_CHK_BOOL_EXEC(out_op_impl && out_op_impl->GetOpDescImpl(), return, "out_handler invalid. name[%s]", + dst_name.c_str()); bool is_const = false; - if (out_handler->GetOwner()->GetOpDescImpl()->GetType() == CONSTANT) { + if (out_op_impl->GetOpDescImpl()->GetType() == CONSTANT) { is_const = true; } auto is_input_const = op_desc_->GetIsInputConst(); @@ -160,14 +194,19 @@ class OperatorImpl : public std::enable_shared_from_this { op_desc_->SetIsInputConst(is_input_const); OpIO in_handler(dst_name, dst_index, shared_from_this()); - auto out_op_impl = out_handler->GetOwner(); - GE_CHK_BOOL_EXEC(out_op_impl != nullptr, return, "Get out_handler's impl failed."); + GE_CHK_BOOL_EXEC(!!out_op_impl, return, "Get out_handler's impl failed."); out_op_impl->UpdateLinkMapImpl(src_name, in_handler); - GE_CHK_BOOL_EXEC( - op_desc_->UpdateInputDesc(dst_name, out_handler->GetOwner()->GetOutputDesc(src_name)) == GRAPH_SUCCESS, return, - "Update input desc failed,dst name is %s,src name is %s", dst_name.c_str(), - src_name.c_str()); // fix for linking opdesc + auto src_output_desc = out_op_impl->GetOutputDesc(src_name); + auto dst_input_desc = op_desc_->GetInputDesc(dst_name); + if (dst_input_desc.GetFormat() == FORMAT_RESERVED) { + src_output_desc.SetFormat(FORMAT_ND); + } else { + src_output_desc.SetFormat(dst_input_desc.GetFormat()); + } + GE_CHK_BOOL_EXEC(op_desc_->UpdateInputDesc(dst_name, src_output_desc) == GRAPH_SUCCESS, return, + "Update input desc failed,dst name is %s,src name is %s", dst_name.c_str(), + src_name.c_str()); // fix for linking opdesc } void AddControlInputImp(const ge::Operator &src_oprt) { @@ -382,7 +421,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Operator OpDescUtils::CreateOpera return Operator("default"); } OperatorKeeper::GetInstance().CheckInOperator(operator_impl_ptr); - return operator_impl_ptr->ToOperator(); + return operator_impl_ptr->ToOperator(); // lint !e514 } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDescPtr OpDescUtils::GetOpDescFromOperator(const Operator &oprt) { @@ -617,26 +656,26 @@ GE_FUNC_HOST_VISIBILITY size_t Operator::GetOutputsSize() const { // According to op get the attrs name and type namespace { const std::map kAttrTypesMap = { - {GeAttrValue::VT_NONE, "VT_STRING"}, - {GeAttrValue::VT_STRING, "VT_STRING"}, - {GeAttrValue::VT_FLOAT, "VT_FLOAT"}, - {GeAttrValue::VT_BOOL, "VT_BOOL"}, - {GeAttrValue::VT_INT, "VT_INT"}, - {GeAttrValue::VT_TENSOR_DESC, "VT_TENSOR_DESC"}, - {GeAttrValue::VT_TENSOR, "VT_TENSOR"}, - {GeAttrValue::VT_BYTES, "VT_BYTES"}, - {GeAttrValue::VT_GRAPH, "VT_GRAPH"}, - {GeAttrValue::VT_NAMED_ATTRS, "VT_NAMED_ATTRS"}, - {GeAttrValue::VT_LIST_BASE, "VT_LIST_BASE"}, - {GeAttrValue::VT_LIST_STRING, "VT_LIST_STRING"}, - {GeAttrValue::VT_LIST_FLOAT, "VT_LIST_FLOAT"}, - {GeAttrValue::VT_LIST_BOOL, "VT_LIST_BOOL"}, - {GeAttrValue::VT_LIST_INT, "VT_LIST_INT"}, - {GeAttrValue::VT_LIST_TENSOR_DESC, "VT_LIST_TENSOR_DESC"}, - {GeAttrValue::VT_LIST_TENSOR, "VT_LIST_TENSOR"}, - {GeAttrValue::VT_LIST_BYTES, "VT_LIST_BYTES"}, - {GeAttrValue::VT_GRAPH, "VT_GRAPH"}, - {GeAttrValue::VT_LIST_NAMED_ATTRS, "VT_LIST_NAMED_ATTRS"}, + {GeAttrValue::VT_NONE, "VT_STRING"}, + {GeAttrValue::VT_STRING, "VT_STRING"}, + {GeAttrValue::VT_FLOAT, "VT_FLOAT"}, + {GeAttrValue::VT_BOOL, "VT_BOOL"}, + {GeAttrValue::VT_INT, "VT_INT"}, + {GeAttrValue::VT_TENSOR_DESC, "VT_TENSOR_DESC"}, + {GeAttrValue::VT_TENSOR, "VT_TENSOR"}, + {GeAttrValue::VT_BYTES, "VT_BYTES"}, + {GeAttrValue::VT_GRAPH, "VT_GRAPH"}, + {GeAttrValue::VT_NAMED_ATTRS, "VT_NAMED_ATTRS"}, + {GeAttrValue::VT_LIST_BASE, "VT_LIST_BASE"}, + {GeAttrValue::VT_LIST_STRING, "VT_LIST_STRING"}, + {GeAttrValue::VT_LIST_FLOAT, "VT_LIST_FLOAT"}, + {GeAttrValue::VT_LIST_BOOL, "VT_LIST_BOOL"}, + {GeAttrValue::VT_LIST_INT, "VT_LIST_INT"}, + {GeAttrValue::VT_LIST_TENSOR_DESC, "VT_LIST_TENSOR_DESC"}, + {GeAttrValue::VT_LIST_TENSOR, "VT_LIST_TENSOR"}, + {GeAttrValue::VT_LIST_BYTES, "VT_LIST_BYTES"}, + {GeAttrValue::VT_GRAPH, "VT_GRAPH"}, + {GeAttrValue::VT_LIST_NAMED_ATTRS, "VT_LIST_NAMED_ATTRS"}, }; } // namespace const std::map Operator::GetAllAttrNamesAndTypes() const { @@ -665,32 +704,32 @@ const std::map Operator::GetAllAttrNamesAndTypes() con void Operator::InputRegister(const string &name) { GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return, "operator impl is nullptr."); GE_CHK_BOOL_EXEC(operator_impl_->GetOpDescImpl() != nullptr, return, "GetOpDescImpl is nullptr."); - (void)operator_impl_->GetOpDescImpl()->AddInputDesc(name, GeTensorDesc()); + operator_impl_->GetOpDescImpl()->AddInputDesc(name, GeTensorDesc()); } void Operator::OptionalInputRegister(const string &name) { GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return, "operator impl is nullptr."); GE_CHK_BOOL_EXEC(operator_impl_->GetOpDescImpl() != nullptr, return, "GetOpDescImpl is nullptr."); (void)operator_impl_->GetOpDescImpl()->AddOptionalInputDesc(name, - GeTensorDesc(GeShape(), FORMAT_RESERVED, DT_UNDEFINED)); + GeTensorDesc(GeShape(), FORMAT_RESERVED, DT_UNDEFINED)); } void Operator::InferFuncRegister(const std::function &func) { GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return, "operator impl is nullptr."); GE_CHK_BOOL_EXEC(operator_impl_->GetOpDescImpl() != nullptr, return, "GetOpDescImpl is nullptr."); - operator_impl_->GetOpDescImpl()->AddInferFunc(func); + (void)operator_impl_->GetOpDescImpl()->AddInferFunc(func); } void Operator::InferFormatFuncRegister(const std::function &func) { GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return, "operator impl is nullptr."); GE_CHK_BOOL_EXEC(operator_impl_->GetOpDescImpl() != nullptr, return, "GetOpDescImpl is nullptr."); - operator_impl_->GetOpDescImpl()->AddInferFormatFunc(func); + (void)operator_impl_->GetOpDescImpl()->AddInferFormatFunc(func); } void Operator::VerifierFuncRegister(const std::function &func) { GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return, "operator impl is nullptr."); GE_CHK_BOOL_EXEC(operator_impl_->GetOpDescImpl() != nullptr, return, "GetOpDescImpl is nullptr."); - operator_impl_->GetOpDescImpl()->AddVerifierFunc(func); + (void)operator_impl_->GetOpDescImpl()->AddVerifierFunc(func); } void Operator::OutputRegister(const string &name) { @@ -734,7 +773,7 @@ int Operator::GetDynamicOutputNum(const string &name) const { void Operator::RequiredAttrRegister(const string &name) { GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return, "operator impl is nullptr."); GE_CHK_BOOL_EXEC(operator_impl_->GetOpDescImpl() != nullptr, return, "GetOpDescImpl is nullptr."); - (void)operator_impl_->GetOpDescImpl()->AddRequiredAttr(name); + operator_impl_->GetOpDescImpl()->AddRequiredAttr(name); } graphStatus Operator::VerifyAll() { @@ -960,26 +999,6 @@ graphStatus Operator::GetAttr(const string &name, OpBytes &attr_value) const { return GRAPH_SUCCESS; } -Operator &Operator::SetAttr(const string &name, const UsrQuantizeFactorParams &attr_value) { - GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return *this, "operator impl is nullptr, name %s.", name.c_str()); - QuantizeFactorParams def_quant; - GE_CHK_BOOL_EXEC(TypeUtils::Usr2DefQuantizeFactorParams(attr_value, def_quant) == GRAPH_SUCCESS, return *this, - "trans para fail"); - GE_CHK_BOOL_EXEC(OpDescUtils::SetQuantizeFactorParams(operator_impl_->GetOpDescImpl(), def_quant) == GRAPH_SUCCESS, - return *this, "operator set QuantizeFactorParams fail"); - return *this; -} - -graphStatus Operator::GetAttr(const string &name, UsrQuantizeFactorParams &attr_value) const { - GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return GRAPH_FAILED, "operator impl is nullptr, name %s.", name.c_str()); - QuantizeFactorParams def_quant; - GE_CHK_BOOL_EXEC(OpDescUtils::GetQuantizeFactorParams(operator_impl_->GetOpDescImpl(), def_quant) == GRAPH_SUCCESS, - return GRAPH_FAILED, "operator get QuantizeFactorParams fail"); - GE_CHK_BOOL_EXEC(TypeUtils::Def2UsrQuantizeFactorParams(def_quant, attr_value) == GRAPH_SUCCESS, return GRAPH_FAILED, - "trans para fail"); - return GRAPH_SUCCESS; -} - Operator &Operator::SetAttr(const string &name, ge::AttrValue &&attrValue) { GE_CHK_BOOL_EXEC(operator_impl_ != nullptr, return *this, "operator impl is nullptr."); (void)operator_impl_->SetAttr(name, std::move(attrValue.impl->geAttrValue_)); @@ -1099,7 +1118,6 @@ class GraphBuilderImpl { explicit GraphBuilderImpl(const string &name) : graph_(ComGraphMakeShared(name)) { if (graph_ == nullptr) { GELOGE(GRAPH_FAILED, "ComputeGraph make shared failed"); - graph_ = nullptr; return; } } diff --git a/src/common/graph/operator_factory_impl.cc b/src/common/graph/operator_factory_impl.cc index f9815968..fbfdafc3 100644 --- a/src/common/graph/operator_factory_impl.cc +++ b/src/common/graph/operator_factory_impl.cc @@ -15,7 +15,6 @@ */ #include "graph/operator_factory_impl.h" - #include "debug/ge_log.h" #include "framework/common/debug/ge_log.h" @@ -97,6 +96,7 @@ VerifyFunc OperatorFactoryImpl::GetVerifyFunc(const std::string &operator_type) graphStatus OperatorFactoryImpl::RegisterOperatorCreator(const string &operator_type, OpCreator const &op_creator) { if (operator_creators_ == nullptr) { + GELOGI("operator_creators_ init"); operator_creators_.reset(new (std::nothrow) std::map()); } auto it = operator_creators_->find(operator_type); diff --git a/src/common/graph/opsproto/opsproto_manager.cc b/src/common/graph/opsproto/opsproto_manager.cc index a5bdb4c5..2c9bf7d1 100644 --- a/src/common/graph/opsproto/opsproto_manager.cc +++ b/src/common/graph/opsproto/opsproto_manager.cc @@ -33,7 +33,9 @@ OpsProtoManager *OpsProtoManager::Instance() { } bool OpsProtoManager::Initialize(const std::map &options) { + /*lint -e1561*/ auto proto_iter = options.find("ge.opsProtoLibPath"); + /*lint +e1561*/ if (proto_iter == options.end()) { GELOGW("ge.opsProtoLibPath option not set, return."); return false; diff --git a/src/common/graph/option/ge_context.cc b/src/common/graph/option/ge_context.cc index ff9b0365..bbf501c7 100644 --- a/src/common/graph/option/ge_context.cc +++ b/src/common/graph/option/ge_context.cc @@ -21,6 +21,10 @@ #include "framework/common/debug/ge_log.h" namespace ge { +namespace { +const int64_t kMinTrainingTraceJobId = 256; +const int kDecimal = 10; +} // namespace GEContext &GetContext() { static GEContext ge_context{}; return ge_context; @@ -58,12 +62,21 @@ void GEContext::Init() { string job_id; (void)GetOption("ge.exec.jobId", job_id); - try { - job_id_ = static_cast(std::stoi(job_id.c_str())); - } catch (std::invalid_argument &) { - GELOGW("%s transform to int failed.", job_id.c_str()); - } catch (std::out_of_range &) { - GELOGW("%s transform to int failed.", job_id.c_str()); + std::string s_job_id = ""; + for (auto c : job_id) { + if (c >= '0' && c <= '9') { + s_job_id += c; + } + } + if (s_job_id == "") { + trace_id_ = kMinTrainingTraceJobId; + return; + } + int64_t d_job_id = std::strtoll(s_job_id.c_str(), nullptr, kDecimal); + if (d_job_id < kMinTrainingTraceJobId) { + trace_id_ = d_job_id + kMinTrainingTraceJobId; + } else { + trace_id_ = d_job_id; } } @@ -71,7 +84,7 @@ uint64_t GEContext::SessionId() { return session_id_; } uint32_t GEContext::DeviceId() { return device_id_; } -uint64_t GEContext::JobId() { return job_id_; } +uint64_t GEContext::TraceId() { return trace_id_; } void GEContext::SetCtxDeviceId(uint32_t device_id) { device_id_ = device_id; } } // namespace ge diff --git a/src/common/graph/option/ge_local_context.cc b/src/common/graph/option/ge_local_context.cc index 2a5b7a34..82b1cb01 100644 --- a/src/common/graph/option/ge_local_context.cc +++ b/src/common/graph/option/ge_local_context.cc @@ -15,7 +15,6 @@ */ #include "./ge_local_context.h" - #include namespace ge { @@ -26,9 +25,14 @@ thread_local GEThreadLocalContext thread_context; GEThreadLocalContext &GetThreadLocalContext() { return thread_context; } graphStatus GEThreadLocalContext::GetOption(const string &key, string &option) { - auto iter = session_options_.find(key); - if (iter != session_options_.end()) { - option = iter->second; + auto graph_iter = graph_options_.find(key); + if (graph_iter != graph_options_.end()) { + option = graph_iter->second; + return GRAPH_SUCCESS; + } + auto session_iter = session_options_.find(key); + if (session_iter != session_options_.end()) { + option = session_iter->second; return GRAPH_SUCCESS; } auto global_iter = global_options_.find(key); @@ -48,4 +52,9 @@ void GEThreadLocalContext::SetSessionOption(map options_map) { session_options_.clear(); session_options_ = std::move(options_map); } + +void GEThreadLocalContext::SetGraphOption(map options_map) { + graph_options_.clear(); + graph_options_ = std::move(options_map); +} } // namespace ge diff --git a/src/common/graph/shape_refiner.cc b/src/common/graph/shape_refiner.cc index 5c976dce..72cdef02 100644 --- a/src/common/graph/shape_refiner.cc +++ b/src/common/graph/shape_refiner.cc @@ -40,7 +40,7 @@ void ShapeRefiner::PrintInOutTensorShape(const ge::NodePtr &node, const std::str return; } ge::OpDescPtr op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(GRAPH_FAILED, "op_desc is null."); return); + GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(GRAPH_FAILED, "op_desc is null."); return ); std::string str; if (!op_desc->GetAllInputsDescPtr().empty()) { std::string input_desc_str = "input shape: "; @@ -118,16 +118,16 @@ graphStatus ShapeRefiner::InferShapeAndType(const ConstNodePtr &node, Operator & InferenceContextPtr CreateInferenceContext(const std::unordered_map &context_map, const NodePtr &node) { - auto ctx = std::shared_ptr(new (std::nothrow) InferenceContext()); - if (ctx == nullptr) { - GELOGE(GRAPH_FAILED, "Failed to alloc InferenceContext"); - return nullptr; - } if (node == nullptr) { GELOGE(GRAPH_FAILED, "node is null"); return nullptr; } - InferenceContextPtr inference_context = std::shared_ptr(ctx); + InferenceContextPtr inference_context = std::shared_ptr(InferenceContext::Create()); + if (inference_context == nullptr) { + GELOGE(GRAPH_FAILED, "Failed to alloc InferenceContext"); + return nullptr; + } + auto all_in_data_anchors = node->GetAllInDataAnchors(); std::vector> input_shapes_and_types(all_in_data_anchors.size()); std::vector marks; @@ -169,9 +169,9 @@ InferenceContextPtr CreateInferenceContext(const std::unordered_mapSetInputHandleShapesAndTypes(std::move(input_shapes_and_types)); + inference_context->SetInputHandleShapesAndTypes(std::move(input_shapes_and_types)); } - ctx->SetMarks(marks); + inference_context->SetMarks(marks); return inference_context; } diff --git a/src/common/graph/tensor.cc b/src/common/graph/tensor.cc index 93e8b4ce..0be00988 100644 --- a/src/common/graph/tensor.cc +++ b/src/common/graph/tensor.cc @@ -15,7 +15,6 @@ */ #include "external/graph/tensor.h" - #include "debug/ge_util.h" #include "framework/common/debug/ge_log.h" #include "graph/ge_tensor.h" @@ -91,41 +90,72 @@ class TensorImpl { GeTensor ge_tensor; }; -Shape::Shape(const std::vector &dims) : dims_(dims) {} +class ShapeImpl { + public: + ShapeImpl() = default; + ~ShapeImpl() = default; + explicit ShapeImpl(const std::vector &dims) : dims_(dims) {} + + std::vector dims_; +}; + +Shape::Shape() { impl_ = ComGraphMakeShared(); } -size_t Shape::GetDimNum() const { return dims_.size(); } +Shape::Shape(const std::vector &dims) { impl_ = ComGraphMakeShared(dims); } + +size_t Shape::GetDimNum() const { + if (impl_ != nullptr) { + return impl_->dims_.size(); + } + return 0; +} int64_t Shape::GetDim(size_t idx) const { - if (idx >= dims_.size()) { - return 0; + if (impl_ != nullptr) { + if (idx >= impl_->dims_.size()) { + return 0; + } + return impl_->dims_[idx]; } - return dims_[idx]; + return 0; } graphStatus Shape::SetDim(size_t idx, int64_t value) { - if (idx >= dims_.size()) { - return GRAPH_FAILED; + if (impl_ != nullptr) { + if (idx >= impl_->dims_.size()) { + return GRAPH_FAILED; + } + impl_->dims_[idx] = value; + return GRAPH_SUCCESS; } - dims_[idx] = value; - return GRAPH_SUCCESS; + return GRAPH_FAILED; } -std::vector Shape::GetDims() const { return dims_; } +std::vector Shape::GetDims() const { + vector dims; + if (impl_ != nullptr) { + return impl_->dims_; + } + return dims; +} int64_t Shape::GetShapeSize() const { - if (dims_.empty()) { - return 0; - } - int64_t size = 1; - for (auto i : dims_) { - if (!Int64MulNotOverflow(size, i)) { - GELOGE(GRAPH_FAILED, "mul overflow: %ld, %ld", size, i); - size = 0; - return size; + if (impl_ != nullptr) { + if (impl_->dims_.empty()) { + return 0; } - size *= i; + int64_t size = 1; + for (auto i : impl_->dims_) { + if (!Int64MulNotOverflow(size, i)) { + GELOGE(GRAPH_FAILED, "mul overflow: %ld, %ld", size, i); + size = 0; + return size; + } + size *= i; + } + return size; } - return size; + return 0; } TensorDesc::TensorDesc() { impl = ComGraphMakeShared(); } @@ -486,6 +516,7 @@ graphStatus Tensor::IsValid() { GELOGW("mul overflow: %lu, %u", shape_size, type_length); } else { if (shape_size * type_length != data_size) { + // [Just log] Constructor GELOGW("tensor length not equal: shape_byte_size=%lu, data_size=%zu, dt_type=%s.", shape_size * type_length, data_size, TypeUtils::DataTypeToSerialString(data_type).c_str()); } diff --git a/src/common/graph/utils/anchor_utils.cc b/src/common/graph/utils/anchor_utils.cc index 3a284328..5a042283 100644 --- a/src/common/graph/utils/anchor_utils.cc +++ b/src/common/graph/utils/anchor_utils.cc @@ -15,9 +15,7 @@ */ #include "utils/anchor_utils.h" - #include - #include "debug/ge_util.h" #include "framework/common/debug/ge_log.h" diff --git a/src/common/graph/utils/ge_ir_utils.cc b/src/common/graph/utils/ge_ir_utils.cc index f796a906..0d22b615 100644 --- a/src/common/graph/utils/ge_ir_utils.cc +++ b/src/common/graph/utils/ge_ir_utils.cc @@ -32,12 +32,12 @@ const int64_t kDumpLevel = (kDumpGeGraph != nullptr) ? std::strtol(kDumpGeGraph, namespace ge { // Part 1: from IR convert to ONNX Protobuf static const std::map kGeDataTypeToOnnxMap = { - {DT_INT64, onnx::TensorProto_DataType_INT64}, {DT_UINT64, onnx::TensorProto_DataType_UINT64}, - {DT_FLOAT, onnx::TensorProto_DataType_FLOAT}, {DT_INT32, onnx::TensorProto_DataType_INT32}, - {DT_UINT32, onnx::TensorProto_DataType_UINT32}, {DT_INT8, onnx::TensorProto_DataType_INT8}, - {DT_UINT8, onnx::TensorProto_DataType_UINT8}, {DT_INT16, onnx::TensorProto_DataType_INT16}, - {DT_UINT16, onnx::TensorProto_DataType_UINT16}, {DT_FLOAT16, onnx::TensorProto_DataType_FLOAT16}, - {DT_DOUBLE, onnx::TensorProto_DataType_DOUBLE}, {DT_BOOL, onnx::TensorProto_DataType_BOOL}, + {DT_INT64, onnx::TensorProto_DataType_INT64}, {DT_UINT64, onnx::TensorProto_DataType_UINT64}, + {DT_FLOAT, onnx::TensorProto_DataType_FLOAT}, {DT_INT32, onnx::TensorProto_DataType_INT32}, + {DT_UINT32, onnx::TensorProto_DataType_UINT32}, {DT_INT8, onnx::TensorProto_DataType_INT8}, + {DT_UINT8, onnx::TensorProto_DataType_UINT8}, {DT_INT16, onnx::TensorProto_DataType_INT16}, + {DT_UINT16, onnx::TensorProto_DataType_UINT16}, {DT_FLOAT16, onnx::TensorProto_DataType_FLOAT16}, + {DT_DOUBLE, onnx::TensorProto_DataType_DOUBLE}, {DT_BOOL, onnx::TensorProto_DataType_BOOL}, }; onnx::TensorProto_DataType OnnxUtils::EncodeDataType(DataType data_type) { @@ -693,12 +693,12 @@ bool OnnxUtils::ConvertGeModelToModelProto(const ge::Model &model, onnx::ModelPr // Part 2: from ONNX Protobuf convert to IR static std::map onnxDataTypeToGeMap = { - {onnx::TensorProto_DataType_INT64, DT_INT64}, {onnx::TensorProto_DataType_UINT64, DT_UINT64}, - {onnx::TensorProto_DataType_FLOAT, DT_FLOAT}, {onnx::TensorProto_DataType_INT32, DT_INT32}, - {onnx::TensorProto_DataType_UINT32, DT_UINT32}, {onnx::TensorProto_DataType_INT8, DT_INT8}, - {onnx::TensorProto_DataType_UINT8, DT_UINT8}, {onnx::TensorProto_DataType_INT16, DT_INT16}, - {onnx::TensorProto_DataType_UINT16, DT_UINT16}, {onnx::TensorProto_DataType_FLOAT16, DT_FLOAT16}, - {onnx::TensorProto_DataType_DOUBLE, DT_DOUBLE}, {onnx::TensorProto_DataType_BOOL, DT_BOOL}, + {onnx::TensorProto_DataType_INT64, DT_INT64}, {onnx::TensorProto_DataType_UINT64, DT_UINT64}, + {onnx::TensorProto_DataType_FLOAT, DT_FLOAT}, {onnx::TensorProto_DataType_INT32, DT_INT32}, + {onnx::TensorProto_DataType_UINT32, DT_UINT32}, {onnx::TensorProto_DataType_INT8, DT_INT8}, + {onnx::TensorProto_DataType_UINT8, DT_UINT8}, {onnx::TensorProto_DataType_INT16, DT_INT16}, + {onnx::TensorProto_DataType_UINT16, DT_UINT16}, {onnx::TensorProto_DataType_FLOAT16, DT_FLOAT16}, + {onnx::TensorProto_DataType_DOUBLE, DT_DOUBLE}, {onnx::TensorProto_DataType_BOOL, DT_BOOL}, }; ge::DataType OnnxUtils::DecodeDataType(onnx::TensorProto_DataType data_type) { @@ -949,7 +949,7 @@ bool OnnxUtils::DecodeNodeDesc(const onnx::NodeProto *node_proto, OpDescPtr &op_ auto size_out = attr.i(); for (int64_t i = 0; i < size_out; i++) { GeTensorDesc ge_tensor_desc; - if (op_desc->AddOutputDesc(ge_tensor_desc) != GRAPH_SUCCESS) { + if (op_desc->AddInputDesc(ge_tensor_desc) != GRAPH_SUCCESS) { GELOGW("add inputdesc failed"); continue; } diff --git a/src/common/graph/utils/graph_utils.cc b/src/common/graph/utils/graph_utils.cc index 4852ba2e..adb36db9 100644 --- a/src/common/graph/utils/graph_utils.cc +++ b/src/common/graph/utils/graph_utils.cc @@ -176,7 +176,7 @@ graphStatus GraphUtils::ReplaceEdgeDst(const OutControlAnchorPtr &src, const InC } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus GraphUtils::InsertNodeBetweenDataAnchors( - const OutDataAnchorPtr &src, const InDataAnchorPtr &dst, const NodePtr &new_node) { + const OutDataAnchorPtr &src, const InDataAnchorPtr &dst, const NodePtr &new_node) { GE_CHECK_NOTNULL(src); GE_CHECK_NOTNULL(dst); GE_CHECK_NOTNULL(new_node); @@ -213,10 +213,10 @@ GraphUtils::RemoveNodeWithoutRelink(const ComputeGraphPtr &compute_graph, const /// Add two edges to the new node, respectively connecting the SRC and DST /// associated with the original edge -/// A ---> B transferred to A ---> N ---> B +/// A ---> B transfered to A ---> N ---> B graphStatus InsertTransNode(ComputeGraph &compute_graph, const InDataAnchorPtr &in_data_anchor, const std::vector &vec_op_desc) { - for (auto &op_desc : vec_op_desc) { + for (const auto &op_desc : vec_op_desc) { GE_CHECK_NOTNULL(op_desc); auto ret = op_desc->AddInputDesc(GeTensorDesc()); @@ -275,9 +275,11 @@ graphStatus InsertTransNode(ComputeGraph &compute_graph, const InDataAnchorPtr & int64_t output_format = 0; if (!AttrUtils::GetInt(op_desc, "input_format", input_format)) { GELOGW("get attr input_format failed"); + continue; } if (!AttrUtils::GetInt(op_desc, "output_format", output_format)) { GELOGW("get attr output_format failed"); + continue; } GE_CHECK_NOTNULL(node_to_insert->GetInDataAnchor(0)->GetPeerOutAnchor()); @@ -299,11 +301,11 @@ graphStatus InsertTransNode(ComputeGraph &compute_graph, const InDataAnchorPtr & } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus GraphUtils::InsertTransNode( - ComputeGraphPtr compute_graph, const InDataAnchorPtr &in_data_anchor, const std::vector &vec_op_desc) { + ComputeGraphPtr compute_graph, const InDataAnchorPtr &in_data_anchor, const std::vector &vec_op_desc) { GE_CHECK_NOTNULL(compute_graph); GE_CHECK_NOTNULL(in_data_anchor); graphStatus ret = - ge::InsertTransNode(*compute_graph, in_data_anchor, vec_op_desc) == GRAPH_SUCCESS ? GRAPH_SUCCESS : GRAPH_FAILED; + ge::InsertTransNode(*compute_graph, in_data_anchor, vec_op_desc) == GRAPH_SUCCESS ? GRAPH_SUCCESS : GRAPH_FAILED; return ret; } @@ -335,6 +337,10 @@ void GraphUtils::RecordOriginalNames(std::vector original_nodes, co for (const auto &node_tmp : original_nodes) { std::vector names_tmp; ge::OpDescPtr opdesc_tmp = node_tmp->GetOpDesc(); + if (opdesc_tmp == nullptr) { + GELOGE(GRAPH_FAILED, "Node %s get opdesc is nullptr", node_tmp->GetName().c_str()); + continue; + } (void)ge::AttrUtils::GetListStr(opdesc_tmp, "original_op_names", names_tmp); if (names_tmp.size() != 0) { original_names.insert(original_names.end(), names_tmp.begin(), names_tmp.end()); @@ -355,7 +361,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY void GraphUtils::RecordOriginalNa GE_CHK_BOOL_EXEC(node != nullptr, return, "node is null."); std::vector original_names; if (names_tmp.size() != 0) { - original_names.insert(original_names.end(), names_tmp.begin(), names_tmp.end()); + (void)original_names.insert(original_names.end(), names_tmp.begin(), names_tmp.end()); } else { std::string tmp; original_names.push_back(tmp); @@ -367,7 +373,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY void GraphUtils::RecordOriginalNa // Check global_step Node has IsVariable and Read. GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool GraphUtils::CheckGlobalStepNode(const ge::NodePtr &node) { GE_CHK_BOOL_EXEC( - node != nullptr, { return false; }, "node is null."); + node != nullptr, { return false; }, "node is null."); bool has_variable = false; bool has_cond_read = false; for (const auto &out : node->GetOutDataNodes()) { @@ -382,21 +388,22 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool GraphUtils::CheckGlobalStepN // Check origin ComputeGraph is TrainGraph. GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool GraphUtils::CheckIsTrainGraph( - const ge::ComputeGraphPtr &compute_graph) { + const ge::ComputeGraphPtr &compute_graph) { GE_CHK_BOOL_EXEC( - compute_graph != nullptr, { return false; }, "compute_graph is nullptr"); + compute_graph != nullptr, { return false; }, "compute_graph is nullptr"); bool is_iterator_v2 = false; bool is_train_graph = false; for (const auto &node : compute_graph->GetDirectNode()) { - if (node->GetType() == "ApplyMomentum") { + if ((node->GetType() == "ApplyMomentum") || (node->GetType() == "ApplyGradientDescent")) { + GELOGI("graph needs iteration."); return true; } // Check global_step has IsVariable and Read. if ((node->GetType() == "Variable") && (node->GetName() == "global_step")) { is_train_graph = CheckGlobalStepNode(node); } else if ((node->GetType() == "FrameworkOp") && (node->GetName() == "IteratorGetNext")) { - // Train Graph must has GetNext. + // Train Graph must have GetNext. is_iterator_v2 = true; } if (is_iterator_v2 && is_train_graph) { @@ -410,7 +417,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool GraphUtils::CheckIsTrainGrap GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool GraphUtils::MatchDumpStr(const std::string &suffix) { char *dump_level = std::getenv(kDumpGraphLevel); int64_t dump_graph_level = - (dump_level != nullptr) ? std::strtol(dump_level, nullptr, kBaseOfIntegerValue) : kDumpLevel2; + (dump_level != nullptr) ? std::strtol(dump_level, nullptr, kBaseOfIntegerValue) : kDumpLevel2; + if (dump_graph_level == kDumpLevel1) { return false; } @@ -499,6 +507,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool GraphUtils::LoadGEGraph(cons ge::Model model; // Get Model object from ModelDef by deserialize ModelDef if (model.Load(model_def) == GRAPH_SUCCESS) { + GE_CHK_BOOL_EXEC(GraphUtils::GetComputeGraph(model.GetGraph()) != nullptr, return false, + "Get computer graph is nullptr"); compute_graph = *(GraphUtils::GetComputeGraph(model.GetGraph())); return true; } else { @@ -509,7 +519,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool GraphUtils::LoadGEGraph(cons // Printing protocol messages in text format is useful for debugging and human editing of messages. GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY void GraphUtils::WriteProtoToTextFile( - const google::protobuf::Message &proto, const char *real_path) { + const google::protobuf::Message &proto, const char *real_path) { #ifdef FMK_SUPPORT_DUMP const int FILE_AUTHORITY = 0600; int fd = open(real_path, O_WRONLY | O_CREAT | O_TRUNC, FILE_AUTHORITY); @@ -563,7 +573,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY void GraphUtils::WriteProtoToText } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool GraphUtils::ReadProtoFromTextFile( - const char *file, google::protobuf::Message *proto) { + const char *file, google::protobuf::Message *proto) { if (file == nullptr || proto == nullptr) { GELOGE(GRAPH_FAILED, "incorrect parameter. file path or message is invalid"); return false; @@ -587,7 +597,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY void GraphUtils::DumpGEGraphToOnn #ifdef FMK_SUPPORT_DUMP char *dump_ge_graph = std::getenv(kDumpGeGraph); int64_t dump_ge_graph_level = - (dump_ge_graph != nullptr) ? std::strtol(dump_ge_graph, nullptr, kBaseOfIntegerValue) : OnnxUtils::NO_DUMP; + (dump_ge_graph != nullptr) ? std::strtol(dump_ge_graph, nullptr, kBaseOfIntegerValue) : OnnxUtils::NO_DUMP; if ((dump_ge_graph_level == OnnxUtils::NO_DUMP) || (dump_ge_graph_level >= OnnxUtils::DUMP_LEVEL_END)) { GELOGD("Skip DumpGEGraphToOnnx with dump_ge_graph_level %ld.", dump_ge_graph_level); return; @@ -1029,8 +1039,8 @@ GraphUtils::ReplaceNodeAnchors(const NodePtr &new_node, const NodePtr &old_node, } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus GraphUtils::ReplaceNodeAnchors( - const NodePtr &new_node, const NodePtr &old_node, const std::initializer_list inputs_map, - const std::initializer_list outputs_map) { + const NodePtr &new_node, const NodePtr &old_node, const std::initializer_list inputs_map, + const std::initializer_list outputs_map) { return ReplaceNodeAnchors(new_node, old_node, std::vector(inputs_map), std::vector(outputs_map)); } diff --git a/src/common/graph/utils/node_utils.cc b/src/common/graph/utils/node_utils.cc index d3ec3db8..ae87435c 100644 --- a/src/common/graph/utils/node_utils.cc +++ b/src/common/graph/utils/node_utils.cc @@ -15,7 +15,6 @@ */ #include "utils/node_utils.h" - #include "debug/ge_op_types.h" #include "debug/ge_util.h" #include "framework/common/debug/ge_log.h" @@ -86,6 +85,7 @@ graphStatus NodeUtils::GetSingleOutputNodeOfNthLayer(const NodePtr &src, int dep return GRAPH_FAILED; } cur_ptr = src->GetOutDataNodes().at(0); + GE_CHECK_NOTNULL(cur_ptr); } dst = cur_ptr; return GRAPH_SUCCESS; @@ -289,8 +289,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer auto peer_op_desc = peer_anchor->GetOwnerNode()->GetOpDesc(); GE_IF_BOOL_EXEC(peer_op_desc == nullptr, GELOGE(GRAPH_FAILED, "peer opdesc is null"); continue); GE_IF_BOOL_EXEC(peer_op_desc->UpdateInputDesc(peer_anchor->GetIdx(), output_tensor) != GRAPH_SUCCESS, - GELOGE(GRAPH_FAILED, "peer opdesc is null"); - continue); + GELOGE(GRAPH_FAILED, "peer opdesc is null"); + continue); } } return GRAPH_SUCCESS; @@ -309,7 +309,7 @@ bool NodeUtils::IsInNodesEmpty(const Node &node) { if ((node.in_control_anchor_ != nullptr) && (!node.in_control_anchor_->IsPeerOutAnchorsEmpty())) { auto peer_out_control_anchors = node.in_control_anchor_->GetPeerOutControlAnchors(); - for (auto &out_control_anchor : peer_out_control_anchors) { + for (const auto &out_control_anchor : peer_out_control_anchors) { if (out_control_anchor != nullptr) { if (out_control_anchor->GetOwnerNode() != nullptr) { return false; diff --git a/src/common/graph/utils/op_desc_utils.cc b/src/common/graph/utils/op_desc_utils.cc index 5cf9353f..57f7cdee 100644 --- a/src/common/graph/utils/op_desc_utils.cc +++ b/src/common/graph/utils/op_desc_utils.cc @@ -30,6 +30,7 @@ using std::vector; +/*lint -e512 -e737 -e752*/ namespace ge { const char OP_DESC_QUANT_PARAMS[] = "quantize_factor"; static const int CONST_OP_NORMAL_WEIGHT_SIZE = 1; @@ -134,11 +135,11 @@ graphStatus OpDescUtils::GetQuantizeFactorParams(const OpDesc &op_desc, Quantize GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus OpDescUtils::SetQuantizeFactorParams(const OpDescPtr &op_desc, const QuantizeFactorParams &quant) { GE_CHK_BOOL_EXEC_INFO(op_desc != nullptr, return GRAPH_FAILED, "op_desc is nullptr"); - return op_desc->SetAttr(OP_DESC_QUANT_PARAMS, GeAttrValue::CreateFrom(quant)); + return op_desc->SetAttr(OP_DESC_QUANT_PARAMS, GeAttrValue::CreateFrom(quant)); // lint !e732 } graphStatus OpDescUtils::SetQuantizeFactorParams(OpDesc &op_desc, const QuantizeFactorParams &quant) { - return op_desc.SetAttr(OP_DESC_QUANT_PARAMS, GeAttrValue::CreateFrom(quant)); + return op_desc.SetAttr(OP_DESC_QUANT_PARAMS, GeAttrValue::CreateFrom(quant)); // lint !e732 } GeTensorPtr OpDescUtils::MutableWeights(OpDesc &op_desc) { @@ -163,7 +164,7 @@ graphStatus OpDescUtils::SetWeights(OpDesc &op_desc, const GeTensorPtr weight) { GELOGE(GRAPH_FAILED, "weight is null"); return GRAPH_FAILED; } - return AttrUtils::SetTensor(&op_desc, ATTR_NAME_WEIGHTS, weight) ? GRAPH_SUCCESS : GRAPH_FAILED; + return AttrUtils::SetTensor(&op_desc, ATTR_NAME_WEIGHTS, weight) ? GRAPH_SUCCESS : GRAPH_FAILED; // lint !e737 } graphStatus OpDescUtils::SetWeights(OpDescPtr op_desc, const GeTensorPtr weight) { @@ -180,7 +181,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY vector OpDescUt } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY vector OpDescUtils::GetWeights( - const ge::ConstNodePtr &node) { + const ge::ConstNodePtr &node) { if (node == nullptr) { return vector(); } @@ -188,7 +189,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY vector OpDescUt } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY vector OpDescUtils::GetConstInputNode( - const ge::Node &node) { + const ge::Node &node) { vector ret; auto in_anchors = node.GetAllInDataAnchors(); for (const auto &in_anchor : in_anchors) { @@ -207,7 +208,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY vector OpDescUtils:: } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY vector OpDescUtils::GetInputData( - const vector &input_nodes) { + const vector &input_nodes) { vector ret; for (const auto &input_node : input_nodes) { auto temp_weight = MutableWeights(input_node->GetOpDesc()); @@ -229,12 +230,12 @@ size_t OpDescUtils::GetNonConstInputsSize(const ge::Node &node) { continue; } } - return input_num; + return input_num; // lint !e712 } else { GE_IF_BOOL_EXEC( - node.GetInDataNodes().size() < GetConstInputs(node).size(), - GELOGE(GRAPH_FAILED, "%zu is smaller than %zu", node.GetInDataNodes().size(), GetConstInputs(node).size()); - return 0); + node.GetInDataNodes().size() < GetConstInputs(node).size(), + GELOGE(GRAPH_FAILED, "%zu is smaller than %zu", node.GetInDataNodes().size(), GetConstInputs(node).size()); + return 0); return node.GetInDataNodes().size() - GetConstInputs(node).size(); } } @@ -334,7 +335,7 @@ bool OpDescUtils::IsNonConstInput(const ge::Node &node, const size_t index) { bool ret = false; if (index < node.GetAllInDataAnchors().size()) { if (NodeUtils::IsAnchorStatusSet(node)) { - ret = (ge::AnchorUtils::GetStatus(node.GetInDataAnchor(static_cast(index))) == ANCHOR_DATA); + ret = (ge::AnchorUtils::GetStatus(node.GetInDataAnchor(static_cast(index))) == ANCHOR_DATA); // lint !e712 } else { for (const auto &anchor : node.GetAllInDataAnchors()) { if (anchor->GetIdx() != static_cast(index)) { @@ -363,13 +364,15 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool OpDescUtils::IsNonConstInput } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY vector OpDescUtils::GetConstInputs( - const ge::ConstNodePtr &node) { - if (node == nullptr) { return vector(); } + const ge::ConstNodePtr &node) { + if (node == nullptr) { + return vector(); + } return GetConstInputs(*node); } GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY vector OpDescUtils::GetNonConstTensorDesc( - const ge::ConstNodePtr &node) { + const ge::ConstNodePtr &node) { if (node == nullptr || node->GetOpDesc() == nullptr) { return vector(); } @@ -377,7 +380,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY vector OpDescUt if (NodeUtils::IsAnchorStatusSet(*node)) { for (const auto &in_anchor : node->GetAllInDataAnchors()) { if (ge::AnchorUtils::GetStatus(in_anchor) == ANCHOR_DATA) { - ret.push_back(node->GetOpDesc()->GetInputDesc(in_anchor->GetIdx())); + (void)ret.push_back(node->GetOpDesc()->GetInputDesc(in_anchor->GetIdx())); } } } else { @@ -387,7 +390,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY vector OpDescUt continue; } if (out_anchor->GetOwnerNode()->GetOpDesc()->GetType() != CONSTANT) { - ret.push_back(node->GetOpDesc()->GetInputDesc(in_anchor->GetIdx())); + (void)ret.push_back(node->GetOpDesc()->GetInputDesc(in_anchor->GetIdx())); } } } @@ -571,3 +574,4 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus OpDescUtils::ClearWei return GRAPH_SUCCESS; } } // namespace ge +/*lint +e512 +e737 +e752*/ diff --git a/src/common/graph/utils/string_utils.h b/src/common/graph/utils/string_utils.h index 79973d33..a9700469 100644 --- a/src/common/graph/utils/string_utils.h +++ b/src/common/graph/utils/string_utils.h @@ -22,7 +22,6 @@ #include #include #include - #include "securec.h" namespace ge { diff --git a/src/ge/CMakeLists.txt b/src/ge/CMakeLists.txt index 0b0c688c..6fc26780 100755 --- a/src/ge/CMakeLists.txt +++ b/src/ge/CMakeLists.txt @@ -260,7 +260,6 @@ target_link_libraries(ge_train ${hccl} ${msprof} ${runtime} - ${cce} ${resouce} rt dl) @@ -468,7 +467,6 @@ target_link_libraries(ge ${mmpa} ${msprof} ${runtime} - ${cce} ${resouce} rt dl) diff --git a/src/ge/client/CMakeLists.txt b/src/ge/client/CMakeLists.txt index 3f034c9c..353d62fe 100755 --- a/src/ge/client/CMakeLists.txt +++ b/src/ge/client/CMakeLists.txt @@ -47,8 +47,6 @@ include_directories(${GE_SOURCE_DIR}/inc/graph) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) include_directories(${GE_SOURCE_DIR}/third_party/securec/include) -include_directories(${GE_SOURCE_DIR}/third_party/json/include) -include_directories(${GE_SOURCE_DIR}/third_party/protobuf/src) include_directories(${CMAKE_BINARY_DIR}) include_directories(${CMAKE_BINARY_DIR}/proto/ge) @@ -70,7 +68,6 @@ target_link_libraries(ge_client_train ${slog} ${mmpa} ${runtime} - ${cce} rt dl) @@ -91,6 +88,5 @@ target_link_libraries(ge_client ${slog} ${mmpa} ${runtime} - ${cce} rt dl) diff --git a/src/ge/client/ge_api.cc b/src/ge/client/ge_api.cc index a2d81695..5b42c9db 100644 --- a/src/ge/client/ge_api.cc +++ b/src/ge/client/ge_api.cc @@ -15,22 +15,19 @@ */ #include "ge/ge_api.h" - #include - #include "common/debug/log.h" -#include "common/ge/datatype_util.h" -#include "common/ge/tbe_plugin_manager.h" #include "framework/common/debug/ge_log.h" -#include "graph/detail/model_serialize_imp.h" +#include "common/ge/datatype_util.h" +#include "proto/ge_api.pb.h" #include "graph/model_serialize.h" -#include "graph/opsproto_manager.h" +#include "graph/detail/model_serialize_imp.h" #include "graph/utils/tensor_adapter.h" -#include "graph/utils/type_utils.h" #include "init/gelib.h" -#include "proto/ge_api.pb.h" -#include "register/op_registry.h" #include "session/session_manager.h" +#include "graph/opsproto_manager.h" +#include "graph/utils/type_utils.h" +#include "register/op_registry.h" using domi::GetContext; using domi::OpRegistry; @@ -102,6 +99,20 @@ Status CheckOptionsValid(const std::map &options) { return SUCCESS; } +void SaveDdkVersion(const std::map &options) { + auto ddk_option = options.find(DDK_VERSION_FLAG); + if (ddk_option != options.end()) { + auto ddk_version = ddk_option->second; + if (!ddk_version.empty()) { + GELOGI("Input ddk version : %s.", ddk_version.c_str()); + domi::GetContext().ddk_version = ddk_version; + } + } else { + GELOGW("No ddkVersion!"); + return; + } +} + // Initialize GE, prepare for execution, call GELib::Initialize Status GEInitialize(const std::map &options) { GELOGT(TRACE_INIT, "GEInitialize start"); @@ -127,7 +138,8 @@ Status GEInitialize(const std::map &options) { return FAILED; } - TBEPluginManager::Instance().InitPreparation(options); + SaveDdkVersion(options); + // call Initialize GELOGT(TRACE_RUNNING, "Initializing environment"); Status ret = ge::GELib::Initialize(options); @@ -169,7 +181,7 @@ Status GEFinalize() { GELOGE(ret, "GEFinalize Failed"); return FAILED; } - TBEPluginManager::Instance().Finalize(); + if (kGeInitialized && ret == SUCCESS) { kGeInitialized = false; } @@ -246,20 +258,24 @@ Session::~Session() { } Status Session::AddGraph(uint32_t graph_id, const Graph &graph) { - GELOGT(TRACE_INIT, "Session AddGraph start"); + std::map options; + return AddGraph(graph_id, graph, options); +} + +Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map &options) { + GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, sessinon_id: %lu.", graph_id, sessionId_); std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if (!instance_ptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Session AddGraph failed"); + if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Sesson."); return FAILED; } - - GELOGT(TRACE_RUNNING, "Adding Graph to session"); - Status ret = instance_ptr->SessionManagerObj().AddGraph(sessionId_, graph_id, graph); + GELOGD("Adding graph to session"); + Status ret = instance_ptr->SessionManagerObj().AddGraph(sessionId_, graph_id, graph, options); if (ret != SUCCESS) { - GELOGE(ret, "Session AddGraph failed"); + GELOGE(ret, "AddGraph failed in Session."); return FAILED; } - GELOGT(TRACE_STOP, "Session AddGraph finished"); + GELOGD("AddGraph finished in Session."); return ret; } diff --git a/src/ge/common/CMakeLists.txt b/src/ge/common/CMakeLists.txt index b0eb4ffc..56a40b78 100755 --- a/src/ge/common/CMakeLists.txt +++ b/src/ge/common/CMakeLists.txt @@ -59,6 +59,9 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "types.cc" "util.cc" "model_saver.cc" +# new files, possibly to be deleted? + "op/attr_value_util.cc" + "op/ge_op_utils.cc" ) ge_protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) @@ -75,9 +78,6 @@ include_directories(${GE_SOURCE_DIR}/inc/graph) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) include_directories(${GE_SOURCE_DIR}/third_party/securec/include) -include_directories(${GE_SOURCE_DIR}/third_party/json/include) -include_directories(${GE_SOURCE_DIR}/third_party/eigen) -include_directories(${GE_SOURCE_DIR}/third_party/protobuf/src) include_directories(${CMAKE_BINARY_DIR}) include_directories(${CMAKE_BINARY_DIR}/proto/ge) @@ -94,7 +94,6 @@ target_link_libraries(ge_common ${c_sec} ${slog} ${mmpa} - ${cce} ${resource} rt dl) diff --git a/src/ge/common/auth/file_saver.cc b/src/ge/common/auth/file_saver.cc index cd28d917..fde9ce87 100644 --- a/src/ge/common/auth/file_saver.cc +++ b/src/ge/common/auth/file_saver.cc @@ -60,10 +60,10 @@ Status FileSaver::WriteData(const void *data, uint32_t size, int32_t fd) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(size == 0 || data == nullptr, return PARAM_INVALID); // Write data - mmSsize_t write_count = mmWrite(fd, const_cast(data), size); + int32_t write_count = mmWrite(fd, const_cast(data), size); // -1: Failed to write to file; - 2: Illegal parameter if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) { - GELOGE(FAILED, "Write data failed. mmpa_errorno = %ld", write_count); + GELOGE(FAILED, "Write data failed. mmpa_errorno = %d", write_count); return FAILED; } @@ -102,9 +102,9 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi ModelPartitionTable &model_partition_table, const std::vector &partition_datas) { GE_CHK_BOOL_RET_STATUS( - !partition_datas.empty() && model_partition_table.num != 0 && model_partition_table.num == partition_datas.size(), - FAILED, "Invalid param:partition data size(%u), model_partition_table.num(%zu).", model_partition_table.num, - partition_datas.size()); + !partition_datas.empty() && model_partition_table.num != 0 && model_partition_table.num == partition_datas.size(), + FAILED, "Invalid param:partition data size(%u), model_partition_table.num(%zu).", model_partition_table.num, + partition_datas.size()); // Open file int32_t fd = 0; GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(OpenFile(fd, file_path) != SUCCESS, return FAILED); @@ -112,17 +112,16 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi do { // Write file header GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - WriteData(static_cast(&file_header), sizeof(ModelFileHeader), fd) != SUCCESS, ret = FAILED; - break); + WriteData(static_cast(&file_header), sizeof(ModelFileHeader), fd) != SUCCESS, ret = FAILED; break); // Write model partition table uint32_t table_size = static_cast(SIZE_OF_MODEL_PARTITION_TABLE(model_partition_table)); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - WriteData(static_cast(&model_partition_table), table_size, fd) != SUCCESS, ret = FAILED; break); + WriteData(static_cast(&model_partition_table), table_size, fd) != SUCCESS, ret = FAILED; break); // Write partition data for (const auto &partition_data : partition_datas) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - WriteData(static_cast(partition_data.data), partition_data.size, fd) != SUCCESS, ret = FAILED; - break); + WriteData(static_cast(partition_data.data), partition_data.size, fd) != SUCCESS, ret = FAILED; + break); } } while (0); // Close file diff --git a/src/ge/common/auth/file_saver.h b/src/ge/common/auth/file_saver.h index e382970a..b17c197d 100644 --- a/src/ge/common/auth/file_saver.h +++ b/src/ge/common/auth/file_saver.h @@ -28,23 +28,23 @@ struct PROC_PARAM { uint8_t *model_name; - // ISV Ek buffer + /* ISV Ek buffer */ uint8_t *model_key; uint32_t model_key_len; - // ISV root certificate buffer + /* ISV root certificate buffer */ uint8_t *root_cert; uint32_t root_cert_len; - // ISV private key buffer + /* ISV private key buffer */ uint8_t *pri_key; uint32_t pri_key_len; - // Raw AI Module Image buffer + /* Raw AI Module Image buffer */ uint8_t *ai_image; uint32_t ai_image_len; - // ISV HW key buffer + /* ISV HW key buffer */ uint8_t *hw_key; uint32_t hw_key_len; }; @@ -61,11 +61,11 @@ using std::string; class FileSaver { public: - /// - /// @ingroup domi_common - /// @brief save model, no encryption - /// @return Status result - /// + /** + * @ingroup domi_common + * @brief save model, no encryption + * @return Status result + */ static Status SaveToFile(const string &file_path, const ge::ModelData &model, const ModelFileHeader *model_file_header = nullptr); @@ -74,26 +74,26 @@ class FileSaver { const std::vector &partition_datas); protected: - /// - /// @ingroup domi_common - /// @brief Check validity of the file path - /// @return Status result - /// + /** + * @ingroup domi_common + * @brief Check validity of the file path + * @return Status result + */ static Status CheckPath(const string &file_path); static Status WriteData(const void *data, uint32_t size, int32_t fd); static Status OpenFile(int32_t &fd, const std::string &file_path); - /// - /// @ingroup domi_common - /// @brief save model to file - /// @param [in] file_path file output path - /// @param [in] file_header file header info - /// @param [in] data model data - /// @param [in] len model length - /// @return Status result - /// + /** + * @ingroup domi_common + * @brief save model to file + * @param [in] file_path file output path + * @param [in] file_header file header info + * @param [in] data model data + * @param [in] len model length + * @return Status result + */ static Status SaveWithFileHeader(const string &file_path, const ModelFileHeader &file_header, const void *data, int len); diff --git a/src/ge/common/convert/pb2json.cc b/src/ge/common/convert/pb2json.cc index f47621b8..2c35a856 100644 --- a/src/ge/common/convert/pb2json.cc +++ b/src/ge/common/convert/pb2json.cc @@ -18,7 +18,6 @@ // Description: This imply file for protobuf message and json interconversion #include "common/convert/pb2json.h" - #include #include @@ -130,7 +129,7 @@ void Pb2Json::OneField2Json(const ProtobufMsg &message, const ProtobufFieldDescr void Pb2Json::RepeatedMessage2Json(const ProtobufMsg &message, const ProtobufFieldDescriptor *field, const ProtobufReflection *reflection, const set &black_fields, Json &json, bool enum2str) { - if (field == nullptr || reflection == nullptr) { + if (nullptr == field || nullptr == reflection) { Message2Json(message, black_fields, json); return; } diff --git a/src/ge/common/convert/pb2json.h b/src/ge/common/convert/pb2json.h index 4048708d..3f4fe84c 100644 --- a/src/ge/common/convert/pb2json.h +++ b/src/ge/common/convert/pb2json.h @@ -19,12 +19,10 @@ #ifndef GE_COMMON_CONVERT_PB2JSON_H_ #define GE_COMMON_CONVERT_PB2JSON_H_ - #include #include #include #include - #include "google/protobuf/descriptor.h" #include "google/protobuf/message.h" #include "nlohmann/json.hpp" @@ -40,12 +38,12 @@ using ProtobufEnumValueDescriptor = ::google::protobuf::EnumValueDescriptor; class Pb2Json { public: /** - * @ingroup domi_omg - * @brief Transfer protobuf object to JSON object - * @param [out] json Converted JSON object - * @return void success - * @author - */ + * @ingroup domi_omg + * @brief Transfer protobuf object to JSON object + * @param [out] json Converted JSON object + * @return void success + * @author + */ static void Message2Json(const ProtobufMsg &message, const std::set &black_fields, Json &json, bool enum2str = false); diff --git a/src/ge/common/debug/memory_dumper.cc b/src/ge/common/debug/memory_dumper.cc old mode 100755 new mode 100644 index fcda5366..67a615f4 --- a/src/ge/common/debug/memory_dumper.cc +++ b/src/ge/common/debug/memory_dumper.cc @@ -21,10 +21,10 @@ #include #include -#include "framework/common/debug/ge_log.h" #include "framework/common/debug/log.h" -#include "framework/common/ge_inner_error_codes.h" +#include "framework/common/debug/ge_log.h" #include "framework/common/util.h" +#include "framework/common/ge_inner_error_codes.h" using std::string; @@ -51,10 +51,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status MemoryDumper::DumpToFile // Write the data to the file Status ret = SUCCESS; - mmSsize_t mmpa_ret = mmWrite(fd, data, len); + int32_t mmpa_ret = mmWrite(fd, data, len); // mmWrite return -1:Failed to write data to file;return -2:Invalid parameter if (mmpa_ret == EN_ERROR || mmpa_ret == EN_INVALID_PARAM) { - GELOGE(FAILED, "Write to file failed. errno = %ld", mmpa_ret); + GELOGE(FAILED, "Write to file failed. errno = %d", mmpa_ret); ret = FAILED; } @@ -99,10 +99,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status MemoryDumper::Dump(void GE_CHK_BOOL_RET_STATUS(data != nullptr, FAILED, "Incorrect parameter. data is nullptr"); #ifdef FMK_SUPPORT_DUMP - mmSsize_t mmpa_ret = mmWrite(fd_, data, len); + int32_t mmpa_ret = mmWrite(fd_, data, len); // mmWrite return -1:failed to write data to file;return -2:invalid parameter if (mmpa_ret == EN_ERROR || mmpa_ret == EN_INVALID_PARAM) { - GELOGE(FAILED, "Write to file failed. errno = %ld", mmpa_ret); + GELOGE(FAILED, "Write to file failed. errno = %d", mmpa_ret); return FAILED; } @@ -136,18 +136,18 @@ int MemoryDumper::OpenFile(const char *filename) { string real_path; char tmp_path[PATH_MAX] = {0}; GE_IF_BOOL_EXEC( - -1 != path_split_pos, string prefix_path = std::string(filename).substr(0, path_split_pos); - string last_path = std::string(filename).substr(path_split_pos, strlen(filename) - 1); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(prefix_path.length() >= PATH_MAX, return kInvalidFd, "Prefix path is too long!"); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(realpath(prefix_path.c_str(), tmp_path) == nullptr, return kInvalidFd, - "Dir %s does not exit.", prefix_path.c_str()); - real_path = std::string(tmp_path) + last_path;) + -1 != path_split_pos, string prefix_path = std::string(filename).substr(0, path_split_pos); + string last_path = std::string(filename).substr(path_split_pos, strlen(filename) - 1); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(prefix_path.length() >= PATH_MAX, return kInvalidFd, "Prefix path is too long!"); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(realpath(prefix_path.c_str(), tmp_path) == nullptr, return kInvalidFd, + "Dir %s does not exit.", prefix_path.c_str()); + real_path = std::string(tmp_path) + last_path;) GE_IF_BOOL_EXEC( - path_split_pos == -1 || path_split_pos == 0, - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(filename) >= PATH_MAX, return kInvalidFd, "Prefix path is too long!"); - GE_IF_BOOL_EXEC(realpath(filename, tmp_path) == nullptr, - GELOGI("File %s does not exit, it will be created.", filename)); - real_path = std::string(tmp_path);) + path_split_pos == -1 || path_split_pos == 0, + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(filename) >= PATH_MAX, return kInvalidFd, "Prefix path is too long!"); + GE_IF_BOOL_EXEC(realpath(filename, tmp_path) == nullptr, + GELOGI("File %s does not exit, it will be created.", filename)); + real_path = std::string(tmp_path);) // Open file, only the current user can read and write, to avoid malicious application access // Using the O_EXCL, if the file already exists,return failed to avoid privilege escalation vulnerability. diff --git a/src/ge/common/formats/format_transfers/datatype_transfer.cc b/src/ge/common/formats/format_transfers/datatype_transfer.cc index 957c101d..935880eb 100644 --- a/src/ge/common/formats/format_transfers/datatype_transfer.cc +++ b/src/ge/common/formats/format_transfers/datatype_transfer.cc @@ -48,19 +48,19 @@ enum DataTypeTransMode { }; std::map, DataTypeTransMode> trans_mode_map{ - {std::pair(DT_FLOAT, DT_FLOAT16), kTransferWithDatatypeFloatToFloat16}, - {std::pair(DT_FLOAT, DT_INT32), kTransferWithDatatypeFloatToInt32}, - {std::pair(DT_FLOAT16, DT_FLOAT), kTransferWithDatatypeFloat16ToFloat}, - {std::pair(DT_FLOAT16, DT_INT32), kTransferWithDatatypeFloat16ToInt32}, - {std::pair(DT_INT32, DT_FLOAT), kTransferWithDatatypeInt32ToFloat}, - {std::pair(DT_INT32, DT_FLOAT16), kTransferWithDatatypeInt32ToFloat16}, - {std::pair(DT_INT32, DT_UINT8), kTransferWithDatatypeInt32ToUint8}, - {std::pair(DT_INT32, DT_INT8), kTransferWithDatatypeInt32ToInt8}, - {std::pair(DT_UINT8, DT_FLOAT), kTransferWithDatatypeUint8ToFloat}, - {std::pair(DT_UINT8, DT_INT32), kTransferWithDatatypeUint8ToInt32}, - {std::pair(DT_INT8, DT_FLOAT), kTransferWithDatatypeInt8ToFloat}, - {std::pair(DT_INT8, DT_INT32), kTransferWithDatatypeInt8ToInt32}, - {std::pair(DT_INT64, DT_INT32), kTransferWithDatatypeInt64ToInt32}}; + {std::pair(DT_FLOAT, DT_FLOAT16), kTransferWithDatatypeFloatToFloat16}, + {std::pair(DT_FLOAT, DT_INT32), kTransferWithDatatypeFloatToInt32}, + {std::pair(DT_FLOAT16, DT_FLOAT), kTransferWithDatatypeFloat16ToFloat}, + {std::pair(DT_FLOAT16, DT_INT32), kTransferWithDatatypeFloat16ToInt32}, + {std::pair(DT_INT32, DT_FLOAT), kTransferWithDatatypeInt32ToFloat}, + {std::pair(DT_INT32, DT_FLOAT16), kTransferWithDatatypeInt32ToFloat16}, + {std::pair(DT_INT32, DT_UINT8), kTransferWithDatatypeInt32ToUint8}, + {std::pair(DT_INT32, DT_INT8), kTransferWithDatatypeInt32ToInt8}, + {std::pair(DT_UINT8, DT_FLOAT), kTransferWithDatatypeUint8ToFloat}, + {std::pair(DT_UINT8, DT_INT32), kTransferWithDatatypeUint8ToInt32}, + {std::pair(DT_INT8, DT_FLOAT), kTransferWithDatatypeInt8ToFloat}, + {std::pair(DT_INT8, DT_INT32), kTransferWithDatatypeInt8ToInt32}, + {std::pair(DT_INT64, DT_INT32), kTransferWithDatatypeInt64ToInt32}}; template Status TransDataSrc2Dst(const CastArgs &args, uint8_t *dst, const size_t data_size) { diff --git a/src/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc b/src/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc index d92cd134..76834437 100644 --- a/src/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc +++ b/src/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc @@ -45,12 +45,12 @@ bool CheckShape(Format format, const ShapeVector &shape) { } } -/// -/// After the conversion to two-dimensional matrix, the memory arrangement is small z and large N. -/// @src_shape: N*H*W -/// @dst_shape: N*W1*H1*H0*w0 -/// @return -/// +/** + * After the conversion to two-dimensional matrix, the memory arrangement is small z and large N. + * @src_shape: N*H*W + * @dst_shape: N*W1*H1*H0*w0 + * @return + */ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, ShapeVector &dst_shape, ShapeVector &hw_shape) { dst_shape.clear(); @@ -150,8 +150,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con auto dst_offset = (h1h0_head + w1_idx * h1h0w0) * size; auto src_offset = (src_h_head + w1_idx * w0) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size * w0)); if (ret != EOK) { @@ -165,8 +165,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con auto dst_offset = (h1h0_head + num_w1 * h1h0w0 + w0_idx) * size; auto src_offset = (src_h_head + src_w_idx) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { @@ -218,8 +218,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con auto src_offset = (h1h0_head + w1_idx * h1h0w0) * size; auto dst_offset = (dst_h_head + w1_idx * w0) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size * w0)); if (ret != EOK) { @@ -233,8 +233,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con auto src_offset = (h1h0_head + num_w1 * h1h0w0 + w0_idx) * size; auto dst_offset = (dst_h_head + dst_w_idx) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { diff --git a/src/ge/common/formats/format_transfers/format_transfer_fractal_z.cc b/src/ge/common/formats/format_transfers/format_transfer_fractal_z.cc index eafb1fa1..aedc7589 100644 --- a/src/ge/common/formats/format_transfers/format_transfer_fractal_z.cc +++ b/src/ge/common/formats/format_transfers/format_transfer_fractal_z.cc @@ -29,13 +29,14 @@ namespace formats { namespace { Status CheckDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_type) > 0 ? SUCCESS : UNSUPPORTED; } -/// -/// FZ represents the weight of convolution,. -/// After the conversion to two-dimensional matrix, the memory arrangement is small n and large Z. -/// If 4D(eg.NCHW) is used to represent convolution kernel, N is width, HWC is height. -/// -/// frac_z axises: (C1*H*W, No, Ni, C0), which Ni = 16, C0 = 16/32, No = Ceil(N/Ni), C1 = Ceil(C/C0) -/// +/** + * FZ represents the weight of convolution,. + * After the conversion to two-dimensional matrix, the memory arrangement is small n and large Z. + * If 4D(eg.NCHW) is used to represent convolution kernel, N is width, HWC is height. + * + * frac_z axises: (C1*H*W, No, Ni, C0), which Ni = 16, C0 = 16/32, No = Ceil(N/Ni), C1 = Ceil(C/C0) + * @return + */ Status TransShapeToFz(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector &dst_shape) { auto c0 = GetCubeSizeByDataType(data_type); if (c0 < 0) { @@ -148,8 +149,8 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) { auto idx = gfi * fractal_ele_cnt + col * c0 + row; auto offset = idx * size; auto protected_size = dst_size - offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - offset + : static_cast(SECUREC_MEM_MAX_LEN); errno_t ret; if (need_pad_zero) { ret = memset_s(dst.get() + offset, static_cast(protected_size), 0, static_cast(size)); @@ -209,8 +210,8 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) { int64_t dst_idx = c1i * hwn1n0c0 + hi * wn1n0c0 + wi * n1n0c0 + n1n0i * c0 + c0i; int64_t dst_offset = dst_idx * data_size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto pad_zero = ((c1i * c0 + c0i) >= c) || (n1n0i >= n); errno_t ret; if (pad_zero) { @@ -274,8 +275,8 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) { int64_t dst_idx = c1i * hwn1n0c0 + hi * wn1n0c0 + wi * n1n0c0 + n1n0i * c0 + c0i; int64_t dst_offset = dst_idx * data_size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto pad_zero = ((c1i * c0 + c0i) >= c) || (n1n0i >= n); errno_t ret; if (pad_zero) { diff --git a/src/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc b/src/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc index 4058d349..3453c232 100644 --- a/src/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc +++ b/src/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc @@ -105,8 +105,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in auto src_offset = src_idx * size; auto dst_offset = dst_idx * size; auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? total_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? total_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { diff --git a/src/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc b/src/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc index 83d33cb2..6f616051 100644 --- a/src/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc +++ b/src/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc @@ -105,8 +105,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in auto src_offset = src_idx * size; auto dst_offset = dst_idx * size; auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? total_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? total_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { diff --git a/src/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc b/src/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc index 9f6f4890..57b840af 100644 --- a/src/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc +++ b/src/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc @@ -105,8 +105,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size auto src_offset = src_idx * size; auto dst_offset = dst_idx * size; auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? total_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? total_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { diff --git a/src/ge/common/formats/formats.cc b/src/ge/common/formats/formats.cc index fcc02eb0..938f0888 100644 --- a/src/ge/common/formats/formats.cc +++ b/src/ge/common/formats/formats.cc @@ -19,7 +19,6 @@ #include #include #include - #include #include #include diff --git a/src/ge/common/fp16_t.cc b/src/ge/common/fp16_t.cc index 51a15fb0..76dfe348 100644 --- a/src/ge/common/fp16_t.cc +++ b/src/ge/common/fp16_t.cc @@ -198,7 +198,7 @@ fp16_t &fp16_t::operator=(const int32_t &i_val) { } } else { e_ret = FP16_EXP_BIAS; - m_tmp = m_tmp << static_cast(kDim_11 - len); + m_tmp = m_tmp << static_cast(kDim_11 - len); e_ret = e_ret + (len - 1); } uint16_t m_ret = static_cast(m_tmp); diff --git a/src/ge/common/fp16_t.h b/src/ge/common/fp16_t.h index a9bdc073..4e46c041 100644 --- a/src/ge/common/fp16_t.h +++ b/src/ge/common/fp16_t.h @@ -17,11 +17,10 @@ #ifndef GE_COMMON_FP16_T_H_ #define GE_COMMON_FP16_T_H_ +#include #include #include -#include - namespace ge { /** *@ingroup fp16 basic parameter diff --git a/src/ge/common/ge/tbe_plugin_manager.cc b/src/ge/common/ge/tbe_plugin_manager.cc deleted file mode 100644 index a053b687..00000000 --- a/src/ge/common/ge/tbe_plugin_manager.cc +++ /dev/null @@ -1,131 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "common/ge/tbe_plugin_manager.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "common/ge/ge_util.h" -#include "framework/common/debug/log.h" -#include "framework/common/debug/ge_log.h" -#include "framework/common/util.h" -#include "framework/common/ge_inner_error_codes.h" -#include "framework/engine/dnnengine.h" -#include "framework/omg/omg_inner_types.h" -#include "external/ge/ge_api_types.h" -#include "register/op_registry.h" -#include "graph/opsproto_manager.h" - -namespace ge { -// Get Singleton Instance -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY TBEPluginManager &TBEPluginManager::Instance() { - static TBEPluginManager instance_ptr_; - return instance_ptr_; -} - -void TBEPluginManager::ClearHandles_() { - for (const auto &handle : handles_vec_) { - if (dlclose(handle) != 0) { - GELOGW("Failed to close handle: %s", dlerror()); - } - } - handles_vec_.clear(); -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::Finalize() { ClearHandles_(); } - -string TBEPluginManager::GetPath() { - Dl_info dl_info; - if (dladdr(reinterpret_cast(&TBEPluginManager::GetPath), &dl_info) == 0) { - GELOGW("Failed to read so path!"); - return string(); - } else { - string so_path = dl_info.dli_fname; - char path[PATH_MAX] = {0}; - if (so_path.length() >= PATH_MAX) { - GELOGW("File path is too long!"); - return string(); - } - if (realpath(so_path.c_str(), path) == nullptr) { - GELOGW("Failed to get realpath of %s", so_path.c_str()); - return string(); - } - - so_path = path; - so_path = so_path.substr(0, so_path.rfind('/') + 1); - return so_path; - } -} - -Status TBEPluginManager::CheckCustomAiCpuOpLib() { - std::vector vec_op_type; - - domi::OpRegistry::Instance()->GetOpTypeByImplyType(vec_op_type, domi::ImplyType::CUSTOM); - for (size_t i = 0; i < vec_op_type.size(); i++) { - bool aicpu_so_exist = false; - std::string ai_cpu_so_name = "lib" + vec_op_type[i] + "_aicpu.so"; - for (size_t j = 0; j < domi::GetContext().aicpu_op_run_paths.size(); j++) { - string bin_file_path = domi::GetContext().aicpu_op_run_paths[j]; - if (bin_file_path.size() >= ai_cpu_so_name.size() && - bin_file_path.compare(bin_file_path.size() - ai_cpu_so_name.size(), ai_cpu_so_name.size(), ai_cpu_so_name) == - 0) { - aicpu_so_exist = true; - break; - } - } - if (!aicpu_so_exist) { - GELOGE(FAILED, "Can't find aicpu run so(%s), please check the plugin path!", ai_cpu_so_name.c_str()); - return FAILED; - } - } - return SUCCESS; -} - -void TBEPluginManager::SaveDdkVersion(const std::string &ddk_version) { - if (ddk_version.empty()) { - return; - } - GELOGI("Input ddk version : %s.", ddk_version.c_str()); - - // Save DDK version number to omgcontext - domi::GetContext().ddk_version = ddk_version; -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::InitPreparation( - const std::map &options) { - Status ret = CheckCustomAiCpuOpLib(); - if (ret != SUCCESS) { - GELOGE(ret, "Check custom aicpu run so failed!"); - return; - } else { - auto ddk_version = options.find("ge.DDK_version"); - if (ddk_version != options.end()) { - SaveDdkVersion(ddk_version->second); - } else { - GELOGW("No ddkVersion!"); - return; - } - } -} -} // namespace ge diff --git a/src/ge/common/ge/tbe_plugin_manager.h b/src/ge/common/ge/tbe_plugin_manager.h deleted file mode 100644 index 9b1e2662..00000000 --- a/src/ge/common/ge/tbe_plugin_manager.h +++ /dev/null @@ -1,62 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_ -#define GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "external/ge/ge_api_error_codes.h" -#include "external/register/register.h" - -namespace ge { -using SoHandlesVec = std::vector; -using std::vector; -using std::string; -using std::map; -using std::function; - -class TBEPluginManager { - public: - void Finalize(); - - // Get TBEPluginManager singleton instance - static TBEPluginManager& Instance(); - - static string GetPath(); - - static void InitPreparation(const std::map &options); - - private: - TBEPluginManager() = default; - ~TBEPluginManager() = default; - void ClearHandles_(); - static Status CheckCustomAiCpuOpLib(); - static void SaveDdkVersion(const std::string &ddk_version); - - SoHandlesVec handles_vec_; -}; -} // namespace ge - -#endif // GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_ diff --git a/src/ge/common/helper/model_helper.cc b/src/ge/common/helper/model_helper.cc index e5270b12..4026fab1 100644 --- a/src/ge/common/helper/model_helper.cc +++ b/src/ge/common/helper/model_helper.cc @@ -26,10 +26,10 @@ #include "graph/utils/attr_utils.h" #include "graph/utils/graph_utils.h" -using std::string; -using ge::TBEKernelStore; -using ge::TBEKernelPtr; using domi::ModelTaskDef; +using ge::TBEKernelPtr; +using ge::TBEKernelStore; +using std::string; namespace ge { FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelHelper::~ModelHelper() { (void)ReleaseLocalModelData(); } @@ -201,7 +201,7 @@ ModelHelper::SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::strin GELOGE(FAILED, "SaveModel fail for save buffer fail"); return FAILED; } - shared_ptr om_file_save_helper = ge::MakeShared(); + std::shared_ptr om_file_save_helper = ge::MakeShared(); GE_CHECK_NOTNULL_EXEC(om_file_save_helper, return MEMALLOC_FAILED); ModelPartition partition_model; partition_model.data = model_buffer.GetData(); @@ -428,7 +428,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::TransModelT TBEKernelPtr tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); GE_IF_BOOL_EXEC(tbe_kernel == nullptr, continue); kernel_store.AddTBEKernel(tbe_kernel); - GELOGI("Add tbe kernel bin %s", tbe_kernel->GetName().c_str()); } } if (!kernel_store.Build()) { diff --git a/src/ge/common/math_util.h b/src/ge/common/math_util.h index 2ff99bc1..150f4154 100644 --- a/src/ge/common/math_util.h +++ b/src/ge/common/math_util.h @@ -18,9 +18,8 @@ #define GE_COMMON_MATH_UTIL_H_ #include -#include - #include +#include #include "Eigen/Eigen" #include "framework/common/debug/log.h" @@ -32,13 +31,13 @@ namespace ge { /** -* @ingroup domi_calibration -* @brief Initializes an input array to a specified value -* @param [in] n array initialization length -* @param [in] alpha initialization value -* @param [out] output array to be initialized -* @return Status -*/ + * @ingroup domi_calibration + * @brief Initializes an input array to a specified value + * @param [in] n array initialization length + * @param [in] alpha initialization value + * @param [out] output array to be initialized + * @return Status + */ template Status NnSet(const int32_t n, const Dtype alpha, Dtype *output) { GE_CHECK_NOTNULL(output); diff --git a/src/ge/common/model_parser/base.h b/src/ge/common/model_parser/base.h index ffc430e8..a78e28f1 100644 --- a/src/ge/common/model_parser/base.h +++ b/src/ge/common/model_parser/base.h @@ -17,6 +17,7 @@ #ifndef GE_COMMON_MODEL_PARSER_BASE_H_ #define GE_COMMON_MODEL_PARSER_BASE_H_ +#include #include #include "framework/common/debug/log.h" @@ -36,7 +37,7 @@ class ModelParserBase { /// @ingroup hiai /// @brief destructor /// - virtual ~ModelParserBase(); + ~ModelParserBase(); /// /// @ingroup hiai diff --git a/src/ge/common/model_saver.cc b/src/ge/common/model_saver.cc old mode 100755 new mode 100644 index c5328578..c3b780f7 --- a/src/ge/common/model_saver.cc +++ b/src/ge/common/model_saver.cc @@ -63,10 +63,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi const char *model_char = model_str.c_str(); uint32_t len = static_cast(model_str.length()); // Write data to file - mmSsize_t mmpa_ret = mmWrite(fd, const_cast((const void *)model_char), len); + int32_t mmpa_ret = mmWrite(fd, const_cast((const void *)model_char), len); if (mmpa_ret == EN_ERROR || mmpa_ret == EN_INVALID_PARAM) { // Need to both print the error info of mmWrite and mmClose, so return ret after mmClose - GELOGE(FAILED, "Write to file failed. errno = %ld", mmpa_ret); + GELOGE(FAILED, "Write to file failed. errno = %d", mmpa_ret); ret = FAILED; } // Close file diff --git a/src/ge/common/op/attr_define.cc b/src/ge/common/op/attr_define.cc new file mode 100644 index 00000000..bd9731ac --- /dev/null +++ b/src/ge/common/op/attr_define.cc @@ -0,0 +1,810 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "framework/common/op/attr_define.h" + +namespace domi { +/** + * Public attribute + */ +const std::string ATTR_NAME_NAME = "name"; + +const std::string ATTR_NAME_TYPE = "type"; + +const std::string ATTR_NAME_WEIGHT_NAME = "weight_name"; + +const std::string ATTR_NAME_IS_QUANTIZE_FACTOR = "quantize_factor"; + +const std::string ATTR_NAME_ALPHA = "alpha"; + +const std::string ATTR_NAME_BETA = "beta"; + +const std::string ATTR_NAME_PADMODE = "pad_mode"; + +const std::string ATTR_NAME_PADMODES = "padding"; + +const std::string ATTR_NAME_MODE = "mode"; + +const std::string ATTR_NAME_FILTER = "filter"; + +const std::string ATTR_NAME_BIAS = "bias"; + +const std::string ATTR_NAME_BIAS_TERM = "bias_term"; + +const std::string ATTR_NAME_HAS_BIAS_VALUE = "has_bias_value"; + +const std::string ATTR_NAME_PAD = "pad"; + +const std::string ATTR_NAME_PADS = "pad"; + +const std::string ATTR_NAME_PAD_SIZE = "pad size"; + +const std::string ATTR_NAME_PAD_MODE = "pad mode"; + +const std::string ATTR_NAME_SCALE = "scale"; + +const std::string ATTR_NAME_WINDOWS = "windows"; + +const std::string ATTR_NAME_GLOBAL_POOLING = "global_pooling"; + +const std::string ATTR_NAME_CEIL_MODE = "ceil_mode"; + +const std::string ATTR_NAME_STRIDE_SIZE = "stride size"; + +const std::string ATTR_NAME_RELU_FLAG = "relu_flag"; + +const std::string ATTR_NAME_ALGO = "algo"; + +const std::string ATTR_NAME_FORMAT = "format"; + +const std::string ATTR_NAME_FILTER_FORMAT = "filter_format"; + +const std::string ATTR_NAME_LRN_K = "lrn_k"; + +const std::string ATTR_NAME_LRN_NORM_REGION = "lrn_normregion"; + +const std::string ATTR_NAME_LRN_LOCAL_SIZE = "lrn_localsize"; + +const std::string ATTR_NAME_LRN_ALPHA = "lrn_alpha"; + +const std::string ATTR_NAME_LRN_BETA = "lrn_beta"; + +const std::string ATTR_NAME_AXIS = "axis"; +const std::string ATTR_NAME_BROADCAST = "broadcast"; + +const std::string ATTR_NAME_OUTPUT = "output"; +const std::string ATTR_NAME_OUTPUT_NUM = "output_num"; +const std::string ATTR_NAME_TIDX = "t_idx"; + +const std::string ATTR_NAME_TPADDINGS = "t_paddings"; +const std::string ATTR_IMG_H = "img_h"; +const std::string ATTR_IMG_W = "img_w"; +const std::string ATTR_NET_H = "net_h"; +const std::string ATTR_NET_W = "net_w"; + +const std::string ATTR_NAME_TMULTIPLES = "t_multiples"; + +const std::string ATTR_NAME_MULTIPLES = "multiples"; + +const std::string ATTR_NAME_T = "T"; +const std::string ATTR_NAME_N = "N"; + +const std::string ATTR_NAME_TSHAPE = "Tshape"; +const std::string ATTR_NAME_NAN_OPT = "nan_opt"; + +const std::string ATTR_NAME_AIPP = "aipp"; +const std::string NEW_AIPP_CONV_OP = "new_conv_op_for_aipp"; + +const std::string ATTR_NAME_SESSION_GRAPH_ID = "session_graph_id"; + +const std::string ATTR_NAME_MULTISHAPE_BATCHLIST = "multi_shape_batchlist"; +const std::string ATTR_NAME_MULTISHAPE_BATCHLIST_SIZE = "multi_shape_batchlist_size"; +const std::string ATTR_MODEL_BATCH_NUM = "batch_num"; + +const std::string ATTR_NAME_INPUT_FORMAT = "input_format"; +const std::string ATTR_NAME_OUTPUT_FORMAT = "output_format"; + +const std::string ATTR_NAME_FRAMEWORK_NODE_DEF = "node_def"; +const std::string ATTR_NAME_FRAMEWORK_OP_DEF = "op_def"; +const std::string ATTR_NAME_FRAMEWORK_FWK_TYPE = "framework_type"; +const std::string ATTR_NAME_FRAMEWORK_FUNC_DEF = "func_def"; +const std::string ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE = "original_type"; + +const std::string ATTR_NAME_INPUT_TENSOR_DESC = "input_tensor_desc"; +const std::string ATTR_NAME_OUTPUT_TENSOR_DESC = "output_tensor_desc"; + +const std::string ATTR_NAME_INFERRED_FORMAT = "inferred_format"; +const std::string ATTR_NAME_PRED_PERMUTE_DELETED = "pred_permute_deleted"; +const std::string ATTR_NAME_IGNORE_PRED_FORMAT = "ignore_pred_format"; +const std::string ATTR_NAME_WEIGHTS = "value"; +const std::string ATTR_NAME_BROACAST_REAL_DIM_CNT = "broacast_real_dim_cnt"; +const std::string ATTR_NAME_DIM_ALIGN = "dim_align"; + +const std::string ATTR_NAME_STREAM_LABEL = "_stream_label"; +const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG = "need_stream_cycle_event"; +const std::string ATTR_NAME_RTSWITCH_RECV_EVENT_ID = "rtswitch_event_id"; + +const std::string ATTR_NAME_AUTOMIC_ADD_START = "automic_add_addr_start"; +const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE = "automic_add_mem_size"; + +/* to be deleted*/ +const std::string ATTR_TO_BE_DELETED = "to_be_deleted"; +const std::string PERMUTE_RESHAPE_FUSION = "permute_reshape_fusion"; +const std::string PERMUTE_RESHAPE_FUSION_CONV_PROPOSAL = "fusion_conv_proposal"; +const std::string PERMUTE_RESHAPE_FUSION_CONV_DECODEBBOX = "fusion_conv_decodebbox"; +const std::string PERMUTE_RESHAPE_FUSION_BOX_TYPE_NUM = "box_type_num"; +const std::string SSD_MBOX_LOC_FUSION = "permute_flatten_fusion"; +const std::string SSD_MBOX_CONF_FUSION = "permute_flatten_reshape_flatten_fusion"; +const std::string SSD_MBOX_OCR_FUSION = "permute_flatten_ocr_fusion"; +const std::string SSD_MBOX_FUSION_BOX_TYPE_NUM = "ssd_mbox_fusion_box_type_num"; +const std::string SSD_RESHAPE_SLICE_CONCAT_FUSION = "reshape_slice_concat_fusion"; + +/* refinedet */ +const std::string REFINEDET_MBOX_LOC_FUSION = "permute_flatten_fusion"; +const std::string REFINEDET_MBOX_CONF_FUSION = "permute_flatten_reshape_flatten_fusion"; +const std::string REFINEDET_MBOX_FUSION_BOX_TYPE_NUM = "ssd_mbox_fusion_box_type_num"; +const std::string REFINEDET_RESHAPE_SLICE_CONCAT_FUSION = "reshape_slice_concat_fusion"; + +const std::string SSD_PRIORBOX_CONCAT = "ssd_mbox_conf_priorbox_concat_flag"; + +/* _Arg */ +const std::string ATTR_NAME_INDEX = "index"; +/* _RetVal */ +const std::string RETVAL_ATTR_NAME_INDEX = "retval_index"; +/*Data*/ +const std::string DATA_ATTR_NAME_DATA_TYPE = "data_type"; + +/*Send*/ +const std::string SEND_ATTR_EVENT_ID = "event_id"; + +/*Recv*/ +const std::string RECV_ATTR_EVENT_ID = "event_id"; + +/** + * convolution + */ +const std::string ATTR_NAME_COEF = "coef"; + +const std::string ATTR_NAME_STRIDE = "stride"; + +const std::string ATTR_NAME_STRIDES = "stride"; + +const std::string ATTR_NAME_DILATION = "dilation"; + +const std::string ATTR_NAME_DILATIONS = "dilation"; + +const std::string CONV_ATTR_NAME_MODE = "mode"; + +const std::string CONV_ATTR_NAME_ALGO = "algo"; + +const std::string CONV_ATTR_NAME_GROUP = "group"; + +const std::string CONV_ATTR_NAME_PAD_MODE = "pad_mode"; + +const std::string CONV_ATTR_NAME_PAD = "pad"; + +const std::string CONV_ATTR_NAME_STRIDE = "stride"; + +const std::string CONV_ATTR_NAME_DILATION = "dilation"; + +const std::string CONV_ATTR_NAME_NUM_OUTPUT = "num_output"; + +const std::string CONV_ATTR_NAME_KERNEL = "kernel"; + +const std::string CONV_ATTR_NAME_FILTER = "filter"; + +const std::string CONV_ATTR_NAME_BIAS = "bias"; + +const std::string CONV_ATTR_NAME_RELU_FLAG = "relu_flag"; + +const std::string CONV_ATTR_NAME_ADJ = "adj"; + +const std::string CONV_ATTR_NAME_TARGET_SHAPE = "target_shape"; + +const std::string CONV_ATTR_NAME_BEFORE_PAD = "before_pad"; + +const std::string CONV_ATTR_NAME_HAS_BIAS = "has_bias"; + +/*Pooling*/ +const std::string POOLING_ATTR_MODE = "mode"; +const std::string POOLING_ATTR_NAN_OPT = "nan_opt"; +const std::string POOLING_ATTR_PAD_MODE = "pad_mode"; +const std::string POOLING_ATTR_GLOBAL_POOLING = "global_pooling"; +const std::string POOLING_ATTR_WINDOW = "window"; +const std::string POOLING_ATTR_PAD = "pad"; +const std::string POOLING_ATTR_STRIDE = "stride"; +const std::string POOLING_ATTR_CEIL_MODE = "ceil_mode"; +const std::string POOLING_ATTR_DATA_MODE = "data_mode"; +const std::string POOLING_ATTR_BEFORE_PAD = "before_pad"; +const std::string POOLING_ATTR_NAME_ALGO = "algo"; + +/*Eltwise*/ +const std::string ELTWISE_ATTR_MODE = "mode"; +const std::string ELTWISE_ATTR_COEFF = "coeff"; +const std::string ELTWISE_ATTR_WEIGHT = "weight"; +const std::string ELTWISE_ATTR_RELU_FLAG = "relu_flag"; +const std::string ELTWISE_ATTR_ALPHA = "alpha"; +const std::string ELTWISE_ATTR_BETA = "beta"; + +/*BatchNorm*/ +const std::string BATCHNORM_ATTR_MODE = "mode"; +const std::string BATCHNORM_ATTR_EPSILON = "epsilon"; +const std::string BATCHNORM_ATTR_USE_GLOBAL_STATS = "use_global_stats"; +const std::string BATCHNORM_ATTR_MOVING_AVERAGE_FRACTION = "moving_average_fraction"; +const std::string BATCHNORM_ATTR_ESTIMATED_MEAN = "estimated_mean"; +const std::string BATCHNORM_ATTR_ESTIMATED_VARIANCE = "estimated_variance"; +const std::string BATCHNORM_ATTR_SCALE = "scale"; +const std::string BATCHNORM_ATTR_BIAS = "bias"; +const std::string BATCHNORM_ATTR_DATA_FORMAT = "data_format"; +const std::string BATCHNORM_ATTR_IS_TRAINING = "is_training"; +const std::string BATCHNORM_ATTR_IS_TRAINING_FUSION = "is_training_fusion"; + +/*huberloss*/ +const std::string HUBER_LOSS_ATTR_DELTA = "delta"; + +/*SSDRealDivTileMul*/ +const std::string SSD_REAL_DIV_TILE_MUL_ATTR_TILE_PARA = "tilepara"; + +/*SSDSumMulRealDivMean*/ +const std::string SSD_SUM_MUL_REALDIV_MEAN_ATTR_REDUCTION_INDICES = "reduction_indices"; +const std::string SSD_SUM_MUL_REALDIV_MEAN_ATTR_AXIS = "axis"; +const std::string SSD_SUM_MUL_REALDIV_MEAN_ATTR_MEAN_PARA = "mean_para"; +const std::string SSD_SUM_MUL_REALDIV_MEAN_ATTR_HAS_SUM = "has_sum"; + +/*ConcatFive2Four*/ +/*ConcatFour2Five*/ +const std::string SSD_BOX_TYPE_NUM = "box_type_num"; +const std::string SSD_CLASS_NUM = "class_num"; +const std::string TRANS_FOR_LOSS_MODE = "trans_for_loss_mode"; +const std::string SSD_FEATURE_MAP_SIZE = "feature_map_size"; +const std::string SSD_FEATURE_MAP_HIGH = "feature_map_high"; +const std::string SSD_FEATURE_MAP_WIDTH = "feature_map_width"; + +/*Scale*/ +const std::string SCALE_ATTR_SCALE = "scale"; +const std::string SCALE_ATTR_BIAS = "bias"; + +/*FullConnection*/ +const std::string FULL_CONNECTION_ATTR_FILTER = "filter"; +const std::string FULL_CONNECTION_ATTR_BIAS = "bias"; +const std::string FULL_CONNECTION_ATTR_NUM_OUTPUT = "num_output"; +const std::string FULL_CONNECTION_ATTR_RELU_FLAG = "relu_flag"; +const std::string FULL_ATTR_NAME_ALGO = "algo"; + +/*SoftmaxOpParams*/ +const std::string SOFTMAX_ATTR_ALGO = "algo"; +const std::string SOFTMAX_ATTR_MODE = "mode"; + +/*SparseSoftmaxCrossEntropy*/ +const std::string SPARSE_SOFTMAX_CROSS_ENTROPY_ATTR_MODE = "cross_entropy_mode"; +const std::string SPARSE_SOFTMAX_CROSS_ENTROPY_IS_GRAD = "cross_entropy_is_grad"; +const std::string SOFTMAX_CROSS_ENTROPY_LABELSMOOTHING = "labelSmoothing"; + +/*Activation*/ +const std::string ACTIVATION_ATTR_MODE = "mode"; +const std::string ACTIVATION_ATTR_COEF = "coef"; + +/* Concat */ +const std::string CONCAT_ATTR_NAME_AXIS = "axis"; + +/* Const */ +const std::string CONST_ATTR_NAME_DATA_TRANSTYPE = "data_transtype"; +const std::string CONST_ATTR_NAME_OUTPUT_FORMAT = "output_format"; +const std::string CONST_ATTR_NAME_OUTPUT_TYPE = "output_type"; + +/* roipooling */ +const std::string ROIPOOLING_ATTR_NAME_POOLED_H = "pooled_h"; +const std::string ROIPOOLING_ATTR_NAME_POOLED_W = "pooled_w"; +const std::string ROIPOOLING_ATTR_NAME_SPATIAL_SCALE = "spatial_scale"; +const std::string ROIPOOLING_ATTR_NAME_RIO_POOLING_MODE = "rio_pooling_mode"; +const std::string ROIPOOLING_ATTR_NAME_POOLING_MODE = "pooling_mode"; +const std::string ROIPOOLING_ATTR_NAME_SAMPLING_RATIO = "sampling_ratio"; + +/* DetectionOutput */ +const std::string DETECTIONOUTPUT_ATTR_NUM_CLASSES = "num_classes"; +const std::string DETECTIONOUTPUT_ATTR_OCR_NUM_CLASSES = "ocr_num_classes"; +const std::string DETECTIONOUTPUT_ATTR_NMS_THRESHOLD = "nms_threshold"; +const std::string DETECTIONOUTPUT_ATTR_TOP_K = "top_k"; +const std::string DETECTIONOUTPUT_ATTR_CONFIDENCE_THRESHOLD = "confidence_threshold"; +const std::string DETECTIONOUTPUT_ATTR_IMG_H = "img_h"; +const std::string DETECTIONOUTPUT_ATTR_IMG_W = "img_w"; +const std::string DETECTIONOUTPUT_ATTR_BATCH_SIZE = "batch_size"; + +/* Ssd DetectionOutput */ +const std::string DETECTIONOUTPUT_ATTR_ETA = "eta"; +const std::string DETECTIONOUTPUT_ATTR_SHARED_LOCATION = "shared_location"; +const std::string DETECTIONOUTPUT_ATTR_BACKGROUND_LABEL_ID = "background_label_id"; +const std::string DETECTIONOUTPUT_ATTR_CODE_TYPE = "code_type"; +const std::string DETECTIONOUTPUT_ATTR_VARIANCE_ENCODED_IN_TARGET = "variance_encoded_in_target"; +const std::string DETECTIONOUTPUT_ATTR_KEEP_TOP_K = "keep_top_k"; + +/* Refinedet DetectionOutput */ +const std::string DETECTIONOUTPUT_ATTR_OBJECTNESS_SCORE = "objectness_score"; + +/* yolo DetectionOutput */ +const std::string DETECTIONOUTPUT_ATTR_ClASSES = "classes"; +const std::string DETECTIONOUTPUT_ATTR_BIASES = "biases"; +const std::string DETECTIONOUTPUT_ATTR_RELATIVE = "relative"; +const std::string DETECTIONOUTPUT_ATTR_OBJECTNESS_THRESHOLD = "objectness_threshold"; +const std::string DETECTIONOUTPUT_ATTR_CLASS_THRESHOLD = "class_threshold"; +const std::string DETECTIONOUTPUT_ATTR_POST_TOP_K = "post_top_k"; +const std::string DETECTIONOUTPUT_ATTR_IOU_THRESHOLD_DECAY = "iou_threshold_decay"; +const std::string DETECTIONOUTPUT_ATTR_COOR_SCALE_FACTOR = "coor_scale_factor"; +const std::string DETECTIONOUTPUT_ATTR_YOLO_VERSION = "yolo_version"; + +/* DetectionPostprocess */ +const std::string POSTPROCESS_ATTR_NAME_CLS_NUM = "cls_num"; +const std::string POSTPROCESS_ATTR_NAME_CONF_THRESH = "conf_thresh"; +const std::string POSTPROCESS_ATTR_NAME_NMS_THRESH = "nms_thresh"; +const std::string POSTPROCESS_ATTR_POST_NMS_TOPN = "post_nms_topn"; +const std::string POSTPROCESS_ATTR_NAME_BBOX_REG_WEIGHT = "bbox_reg_weights"; + +/* Spatialtransfrom */ +const std::string SPTIALTF_ATTR_NAME_OUTPUT_H = "output_h"; +const std::string SPTIALTF_ATTR_NAME_OUTPUT_W = "output_w"; +const std::string SPTIALTF_ATTR_NAME_BORDER_VALUE = "border_value"; +const std::string SPTIALTF_ATTR_NAME_AFFINE_TRANSFORM = "affine_transform"; + +/* Proposal */ +const std::string PROPOSAL_ATTR_NAME_FEAT_STRIDE = "feat_stride"; +const std::string PROPOSAL_ATTR_NAME_BASE_SIZE = "base_size"; +const std::string PROPOSAL_ATTR_NAME_MIN_SIZE = "min_size"; +const std::string PROPOSAL_ATTR_NAME_RATIO = "ratio"; +const std::string PROPOSAL_ATTR_NAME_SCALE = "scale"; +const std::string PROPOSAL_ATTR_NAME_PRE_NMS_TOPN = "pre_nms_topn"; +const std::string PROPOSAL_ATTR_NAME_POST_NMS_TOPN = "post_nms_topn"; +const std::string PROPOSAL_ATTR_NAME_NMS_THRESH = "nms_thresh"; +const std::string PROPOSAL_ATTR_NAME_TOP_SIZE = "top_size"; +const std::string PROPOSAL_ATTR_IMG_H = "img_h"; +const std::string PROPOSAL_ATTR_IMG_W = "img_w"; + +/* Softmax */ +const std::string SOFTMAX_ATTR_AXIS = "axis"; + +/* Permute */ +const std::string PERMUTE_ATTR_ORDER = "order"; +const std::string PERMUTE_ATTR_PERM = "perm"; + +/*SSD Normalize*/ +const std::string SSDNORMALIZE_ATTR_ACCROSS_SPATIAL = "across_spatial"; +const std::string SSDNORMALIZE_ATTR_CHANNEL_SHARED = "channel_shared"; +const std::string SSDNORMALIZE_ATTR_EPS = "eps"; + +/* Flatten */ +const std::string FLATTEN_ATTR_AXIS = "axis"; +const std::string FLATTEN_ATTR_END_AXIS = "end_axis"; + +/* SsdPRIORBOX */ +const std::string SSD_PRIOR_BOX_ATTR_FLIP = "flip"; +const std::string SSD_PRIOR_BOX_ATTR_CLIP = "clip"; +const std::string SSD_PRIOR_BOX_ATTR_IMG_H = "img_h"; +const std::string SSD_PRIOR_BOX_ATTR_IMG_W = "img_w"; +const std::string SSD_PRIOR_BOX_ATTR_STEP_H = "step_h"; +const std::string SSD_PRIOR_BOX_ATTR_STEP_W = "step_w"; +const std::string SSD_PRIOR_BOX_ATTR_OFFSET = "offset"; +const std::string SSD_PRIOR_BOX_ATTR_MIN_SIZE = "min_size"; +const std::string SSD_PRIOR_BOX_ATTR_MAX_SIZE = "max_size"; +const std::string SSD_PRIOR_BOX_ATTR_MIN_SIZE_NUM = "min_size_num"; +const std::string SSD_PRIOR_BOX_ATTR_MAX_SIZE_NUM = "max_size_num"; +const std::string SSD_PRIOR_BOX_ATTR_ASPECT_RATIO = "aspect_ratio"; +const std::string SSD_PRIOR_BOX_ATTR_ASPECT_RATIO_NUM = "aspect_ratio_num"; +const std::string SSD_PRIOR_BOX_ATTR_VARIANCE = "variance"; +const std::string SSD_PRIOR_BOX_ATTR_VARIANCE_NUM = "variance_num"; + +/* RefinedetDetectionOutput */ +const std::string REFINEDET_PRIOR_BOX_ATTR_VARIANCE_NUM = "variance_num"; +const std::string REFINEDET_PRIOR_BOX_ATTR_VARIANCE = "variance"; + +/* PRelu */ +const std::string PRELU_ATTR_CHANNEL_SHARED = "channel_shared"; + +/*psroi pooling*/ +const std::string PSROIPOOLING_ATTR_SPATIAL_SCALE = "spatial_scale"; +const std::string PSROIPOOLING_ATTR_OUTPUT_DIM = "output_dim"; +const std::string PSROIPOOLING_ATTR_GROUP_SIZE = "group_size"; + +/* power */ +const std::string POWER_ATTR_NAME_POWER = "power"; +const std::string POWER_ATTR_NAME_SCALE = "scale"; +const std::string POWER_ATTR_NAME_SHIFT = "shift"; + +/* log */ +const std::string LOG_ATTR_NAME_SCALE = "scale"; +const std::string LOG_ATTR_NAME_SHIFT = "shift"; +const std::string LOG_ATTR_NAME_BASE = "base"; + +/*pack*/ +const std::string PACK_ATTR_NAME_NUM = "N"; + +/*unpack*/ +const std::string UNPACK_ATTR_NAME_NUM = "num"; + +const std::string DYNAMIC_STITCH_ATTR_NAME_NUM = "DynamicStitchN_"; + +/*gathernd*/ +const std::string GATHERND_ATTR_NAME_TINDICES = "Tindices"; +const std::string GATHERND_ATTR_NAME_TPARAMS = "Tparams"; + +/*argmax*/ +const std::string ARGMAX_ATTR_NAME_TOPK = "topk"; +const std::string ARGMAX_ATTR_NAME_REDUCESIZE = "reduce_size"; +const std::string ARGMAX_ATTR_NAME_REDUCESTRIDE = "reduce_stride"; +const std::string ARGMAX_ATTR_NAME_OUTMAX = "outmaxval"; +const std::string ARGMAX_ATTR_NAME_AXIS = "axis"; +const std::string ARGMAX_ATTR_NAME_AXISTYPE = "axis_type"; +const std::string ARGMAX_ATTR_NAME_KEEPDIMS = "keep_dims"; + +/*upsample*/ +const std::string UPSAMPLE_ATTR_NAME_SCALE_H = "scale_h"; +const std::string UPSAMPLE_ATTR_NAME_SCALE_W = "scale_w"; + +/* relu */ +const std::string ATTR_NAME_NEGATIVE_SLOPE = "negative_slope"; + +/* FreeSpaceExtract */ +const std::string FREESPACEEXTRACT_ATTR_NAME_ORG_HEIGHT = "org_height"; + +/* split */ +const std::string SPLIT_ATTR_NAME_SLICE_POINT = "slice_point"; +const std::string SPLIT_ATTR_NAME_SIZE_SPLIT = "size_split"; +const std::string SPLIT_ATTR_NAME_NUM_SPLIT = "num_split"; + +/* Tvm */ +const std::string TVM_ATTR_NAME_MAGIC = "tvm_magic"; +const std::string TVM_ATTR_NAME_BLOCKDIM = "tvm_blockdim"; +const std::string TVM_ATTR_NAME_METADATA = "tvm_metadata"; + +/*squeeze*/ +const std::string SQUEEZE_ATTR_AXIS = "axis"; +const std::string SQUEEZE_ATTR_DIMS = "squeeze_dims"; +const std::string SQUEEZE_OP_NAME = "Squeeze"; + +/*stride slice*/ +const std::string STRIDE_SLICE_ATTR_BEGIN_MASK = "begin_mask"; +const std::string STRIDE_SLICE_ATTR_END_MASK = "end_mask"; +const std::string STRIDE_SLICE_ATTR_ELLIPSIS_MASK = "ellipsis_mask"; +const std::string STRIDE_SLICE_ATTR_NEW_AXIS_MASK = "new_axis_mask"; +const std::string STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK = "shrink_axis_mask"; + +/*slice*/ +const std::string SLICE_ATTR_NAME_BEGINS = "begins"; +const std::string SLICE_ATTR_NAME_SIZES = "sizes"; + +/*roialign*/ +const std::string ROIALIGN_ATTR_SPATIAL_SCALE = "spatial_scale"; +const std::string ROIALIGN_ATTR_SAMPLING_RATIO = "sampling_ratio"; +const std::string ROIALIGN_ATTR_NAME_POOLED_H = "pooled_h"; +const std::string ROIALIGN_ATTR_NAME_POOLED_W = "pooled_w"; + +/*generate_rpn_proposal*/ +const std::string GENERATE_RPN_PROPOSAL_ATTR_PRE_NMS_TOPK = "pre_nms_topk"; +const std::string GENERATE_RPN_PROPOSAL_ATTR_POST_NMS_TOPK = "post_nms_topk"; +const std::string GENERATE_RPN_PROPOSAL_ATTR_RPN_MINI_SIZE = "rpn_mini_size"; +const std::string GENERATE_RPN_PROPOSAL_ATTR_RPN_PROPOSAL_NMS_THRESH = "rpn_proposal_nms_thresh"; +const std::string GENERATE_RPN_PROPOSAL_ATTR_RPN_PROPOSAL_FILTER_THRESH = "rpn_proposal_filter_thresh"; + +/*decode_bbox*/ +const std::string DECODE_BBOX_ATTR_DECODECLIP = "decodeClip"; + +/* Cast */ +const std::string CAST_ATTR_DSTT = "DstT"; +const std::string CAST_ATTR_SRCT = "SrcT"; + +/* fastrcnnn predications*/ +const std::string FASTRCNN_PREDICTIONS_ATTR_TOPK = "fsr_topk"; +const std::string FASTRCNN_PREDICTIONS_ATTR_SCORE_THRESHOLD = "fsr_score_thres"; +const std::string FASTRCNN_PREDICTIONS_ATTR_NMS_THRESHOLD = "fsr_nms_thres"; +const std::string FASTRCNN_PREDICTIONS_ATTR_NUM_CLASSES = "fsr_num_classes"; + +/* REORG*/ +const std::string REORG_ATTR_STRIDE = "stride"; +const std::string REORG_ATTR_REVERSE = "reverse"; + +/* MERGE*/ +const std::string MERGE_DEAD_INDEX = "merge_dead_index"; +const std::string MERGE_PRENODE_FLAG = "merge_prenode_flag"; +const std::string TO_BE_OUTPUT = "to_be_output"; + +/*Concatv2*/ +const std::string CONCAT_V2_ATTR_TIDX = "Tidx"; +const std::string CONCAT_V2_ATTR_N = "N"; + +/* SUM*/ +const std::string SUM_ATTR_TIDX = "Tidx"; +const std::string SUM_ATTR_AXIS = "axis"; +const std::string SUM_ATTR_KEEP_DIMS = "keep_dims"; + +/*ResizeBilinear*/ +const std::string RESIZE_BILINEAR_ATTR_MODE = "mode"; +const std::string RESIZE_BILINEAR_ATTR_ALIGN_CORNERS = "align_corners"; +const std::string RESIZE_BILINEAR_ATTR_HEIGHT = "height"; +const std::string RESIZE_BILINEAR_ATTR_WIDTH = "width"; +const std::string RESIZE_BILINEAR_ATTR_ZOOM_FACTOR = "zoom_factor"; +const std::string RESIZE_BILINEAR_ATTR_SHRINK_FACTOR = "shrink_factor"; +const std::string RESIZE_BILINEAR_ATTR_PAD_BEGIN = "pad_begin"; +const std::string RESIZE_BILINEAR_ATTR_PAD_END = "pad_end"; +const std::string RESIZE_BILINEAR_ATTR_ALPHA = "alpha"; +const std::string RESIZE_BILINEAR_ATTR_BETA = "beta"; + +/*RetinaNet*/ +const std::string RETINANET_FILTER_BACKGROUND_TRUE = "retina_conv_filter_background"; +const std::string RETINANET_ANCHOR_FUSION = "retina_anchor_fusion"; + +/*MatMul*/ +const std::string MATMUL_TRANSPOSE_X = "transposeX"; +const std::string MATMUL_TRANSPOSE_W = "transposeW"; +const std::string MATMUL_HAS_BIAS = "has_bias"; +const std::string MATMUL_ATTR_IS_TRAINING = "matmul_is_training"; + +/*Flatten*/ +const std::string FLATTEN_START_AXIS = "start_axis"; +const std::string FLATTEN_END_AXIS = "end_axis"; + +/*reshape*/ +const std::string RESHAPE_ATTR_AXIS = "axis"; +const std::string RESHAPE_ATTR_NUM_AXES = "num_axes"; +const std::string RESHAPE_ATTR_FORMAT = "format"; +const std::string RESHAPE_ATTR_SHAPE = "shape"; +const std::string RESHAPE_ATTR_ALPHA = "alpha"; +const std::string RESHAPE_ATTR_BETA = "beta"; + +/*frameoworkop*/ +const std::string T_IN_DATATYPE = "t_in_datatype"; +const std::string T_OUT_DATATYPE = "t_out_datatype"; +const std::string ATTR_NAME_OUT_N = "out_n"; +const std::string ATTR_NAME_OUT_C = "out_c"; +const std::string ATTR_NAME_OUT_H = "out_h"; +const std::string ATTR_NAME_OUT_W = "out_w"; +const std::string ATTR_PAD_DEPTH_CONV = "pad_depth_conv"; +const std::string ATTR_PAD_CONV = "pad_conv"; + +const std::string ATTR_NAME_BEFORE_PAD = "before_pad"; +const std::string ANN_MEAN_KEEPDIMS = "AnnMeanKeepDims"; +const std::string PAD_ATTR_PADDINGDS = "paddings"; +const std::string PAD_ATTR_CONSTANT_VALUE = "padvalue"; + +/*ConvGradFilter*/ +const std::string CONV_GRAD_FILTER_OUTPUT_SHAPE = "conv_grad_filter_output_shape"; + +/*ConvGradInput*/ +const std::string CONV_GRAD_INPUT_OUTPUT_SHAPE = "conv_grad_input_output_shape"; + +/*Rnn*/ +const std::string RNN_MODE_STATIC = "rnn_static"; +const std::string MUTI_RNN = "multi_rnn"; +const std::string CNN_RNN = "cnn_rnn"; +const std::string CELL_MODE = "mode"; +const std::string LSTM_CELL = "lstm_cell"; +const std::string GRU_CELL = "gru_cell"; +const std::string RNN_HT = "ht"; +const std::string RNN_XT_HT = "xt_ht"; +const std::string RNN_BATCH_SIZE = "batch_size"; +const std::string LSTM_CELL_CLIP = "lstm_cell_clip"; +const std::string LSTM_PROJ_CLIP = "lstm_proj_clip"; +const std::string LSTM_ACTIVATE = "lstm_activate"; +const std::string LSTM_OUT_MAP = "lstm_out_map"; +const std::string LSTM_OUT_MODE = "lstm_out_mode"; +const std::string LSTM_STATE_OUT_MODE = "lstm_state_out_mode"; +const std::string LSTM_TIME_MAJOR = "lstm_time_major"; +const std::string LSTM_IS_INPUT_PRE_PROCESS = "lstm_is_input_pre_process"; + +/*Upsample*/ +const std::string UPSAMPLE_ATTR_NAME_SCALE = "scale"; + +/*PadV2*/ +const std::string PADV2_ATTR_NAME_MODE = "mode"; +const std::string PADV2_ATTR_NAME_PADS = "paddings"; +const std::string PADV2_ATTR_NAME_T = "T"; +const std::string PADV2_ATTR_NAME_PAD_FORMAT = "pad_format"; +const std::string PADV2_ATTR_NAME_CONST_VALUE = "const_value"; + +/*MirrorPad*/ +const std::string MIRRORPAD_ATTR_NAME_MODE = "mode"; +const std::string MIRRORPAD_ATTR_NAME_PADS = "paddings"; +const std::string MIRRORPAD_ATTR_NAME_PAD_FORMAT = "pad_format"; +const std::string MIRRORPAD_ATTR_NAME_CONST_VALUE = "const_value"; + +/* filler */ +const std::string FILLER_TYPE = "filler_type"; +const std::string FILLER_VALUE = "filler_value"; + +/*shufflechannel*/ +const std::string SHUFFLE_CHANNEL_GROUP = "group"; + +/*TopKV2*/ +const std::string TOPKV2_ATTR_K = "k"; + +/*Calibaration*/ +const std::string STRIDE_H_INDEX = "STRIDE_H_INDEX"; +const std::string STRIDE_W_INDEX = "STRIDE_W_INDEX"; +const std::string PAD_TOP_INDEX = "PAD_TOP_INDEX"; +const std::string PAD_BOTTOM_INDEX = "PAD_BOTTOM_INDEX"; +const std::string PAD_RIGHT_INDEX = "PAD_RIGHT_INDEX"; +const std::string PAD_LEFT_INDEX = "PAD_LEFT_INDEX"; + +const std::string ATTR_NAME_IS_CONST = "attr_name_is_const"; + +const std::string ATTR_NAME_GROUP = "group"; +const std::string ATTR_NAME_DILATION_SIZE = "dilation_size"; +const std::string ATTR_NAME_EPSILON = "epsilon"; +const std::string ATTR_NAME_POOLING_MODE = "mode"; +const std::string ATTR_NAME_CLASS_NUM = "class_num"; +/** + * model + */ +const std::string ATTR_MODEL_TARGET_TYPE = "target_type"; + +const std::string ATTR_MODEL_STREAM_NUM = "stream_num"; + +const std::string ATTR_MODEL_EVENT_NUM = "event_num"; + +const std::string ATTR_MODEL_MEMORY_SIZE = "memory_size"; + +const std::string ATTR_MODEL_WEIGHT_SIZE = "weight_size"; + +const std::string ATTR_MODEL_TASK_GEN_BASE_ADDR = "task_gen_base_addr"; + +const std::string ATTR_MODEL_TASK_GEN_WEIGHT_ADDR = "task_gen_weight_addr"; + +/** + * Public attribute + */ +const std::string ATTR_NAME_IMPLY_TYPE = "imply_type"; + +const std::string ATTR_NAME_BYTE_SIZE = "op_byte_size"; + +const std::string ATTR_NAME_FUSION_INFERENCE_ID = "fusion_inference_id"; + +const std::string ATTR_NAME_FUSION_OPDEF = "fusion_opdef"; + +const std::string ATTR_NAME_FUSION_SCOPE = "fusion_scope"; + +const std::string ATTR_NAME_OPATTR = "opattr"; + +const std::string ATTR_NAME_RELUFLAG = "relu_flag"; + +const std::string ATTR_NAME_SEQLEN_INDEX = "seqlen_index"; + +const std::string ATTR_NAME_X_INDEX = "x_index"; + +const std::string ATTR_NAME_CONT_INDEX = "cont_index"; + +const std::string ATTR_NAME_XSTATIC_INDEX = "xstatic_index"; + +const std::string TARGET_TYPE_MINI = "MINI"; + +const std::string TARGET_TYPE_TINY = "TINY"; + +const std::string TARGET_TYPE_LITE = "LITE"; + +/*l2_normalize*/ +const std::string L2_NORMALIZE_ATTR_AXIS = "axis"; +const std::string L2_NORMALIZE_ATTR_EPS = "eps"; + +const std::string POOL_PARAMA_ATTR_WINDOW = "window"; +const std::string POOL_PARAMA_ATTR_CEIL_MODE = "ceil_mode"; +const std::string POOL_PARAMA_ATTR_DATA_MODE = "data_mode"; +const std::string POOL_PARAMA_ATTR_GLOBAL_POOLING = "global_pooling"; +const std::string POOL_PARAMA_ATTR_NAN_OP = "nan_opt"; +const std::string POOL_PARAMA_ATTR_PAD_MOD = "pad_mode"; + +/*HCOM*/ +const std::string HCOM_ATTR_ROOT_RANK = "root_rank"; +const std::string HCOM_ATTR_RANK_SIZE = "rank_size"; + +const std::string HCOM_ATTR_REDUCE_TYPE = "reduction"; +const std::string HCOM_ATTR_GROUP = "group"; +const std::string HCOM_ATTR_SR_TAG = "sr_tag"; +const std::string HCOM_ATTR_SRC_RANK = "src_rank"; +const std::string HCOM_ATTR_DEST_RANK = "dest_rank"; +const std::string HCOM_ATTR_FUSION = "fusion"; +const std::string HCOM_ATTR_SHAPE = "shape"; +const std::string HCOM_ATTR_DATA_TYPE = "dtype"; + +/*SpaceToDepth/DepthToSpace*/ +const std::string ATTR_NAME_BLOCK_SIZE = "block_size"; + +/*SparseSoftmaxCrossEntropyWithLogits*/ +const std::string SPARSE_SOFT_MAX_ATTR_TLABLES = "Tlabels"; + +/*MaxPoolGradWithArgmax*/ +const std::string MAX_POOL_GRAD_OUTPUT_SHAPE = "max_pool_grad_output_shape"; + +/*AvgPoolGrad*/ +const std::string AVG_POOL_GRAD_OUTPUT_SHAPE = "avg_pool_grad_output_shape"; + +/*Pad*/ +const std::string ATTR_PAD_FORMAT = "attr_pad_format"; + +/*Varible*/ +const std::string VAR_ATTR_FORMAT = "_var_format"; +const std::string VAR_ATTR_NAME = "var_name"; +const std::string VAR_ATTR_FRACTALZ_FORMAT = "FZ"; +const std::string VAR_ATTR_4D_FORMAT = "4D"; +const std::string VAR_ATTR_5D_FORMAT = "5D"; +const std::string VAR_ATTR_DATA_TYPE = "data_format"; +const std::string VAR_ATTR_VAR_IN_NAME = "var_in_name"; +const std::string VAR_ATTR_VAR_IN_INDEX = "var_in_index"; +const std::string VAR_ATTR_VAR_OUT_INDEX = "var_out_index"; +const std::string VAR_ATTR_SHAPE = "shape"; +const std::string HALF_VAR_NAME_END = "_fp16"; +const std::string VAR_ATTR_INITED = "var_is_inited"; + +const std::string VAR_ATTR_CONTAINER = "container"; +const std::string VAR_ATTR_SHARED_NAME = "shared_name"; +const std::string VAR_ATTR_DTYPE = "dtype"; + +const std::string VAR_ATTR_SRC_VAR_NAME = "_src_var_name"; +const std::string VAR_ATTR_VAR_IS_SAVE = "_var_is_save"; +const std::string VAR_ATTR_VAR_IS_RESTORE = "_var_is_restore"; +const std::string VAR_ATTR_VAR_IS_BROADCAST = "_var_is_broadcast"; +const std::string REF_VAR_SRC_VAR_NAME = "ref_var_src_var_name"; +const std::string REF_VAR_PRE_PEER_OUT_INDEX = "ref_var_pre_peer_out_index"; + +/*Assign*/ +const std::string ASSIGN_VALIDATE_SHAPE = "validate_shape"; + +/* space2bacth batch2space */ +const std::string BATCH_SPACE_ATTR_BLOCK = "block"; +const std::string BATCH_SPACE_ATTR_PADDING = "padding"; + +/*depth_to_space space_to_depth*/ +const std::string DEPTH_SPACE_ATTR_BLOCK_SIZE = "block_size"; + +/*FakeQuantWithMinMaxVars*/ +const std::string FakeQuantWithMinMaxVars_ATTR_MAX = "max"; +const std::string FakeQuantWithMinMaxVars_ATTR_MIN = "min"; + +/*mobilenet_ssd_conv_fusion*/ +const std::string SSD_BOXPREDICTOR_BOXES_FUSION = "ssd_boxpredictor_boxes_fusion"; +const std::string SSD_BOXPREDICTOR_SCORES_FUSION = "ssd_boxpredictor_scores_fusion"; +const std::string SSD_BOXPREDICTOR_FUSION_BOX_TYPE_NUM = "ssd_boxpredictor_fusion_box_type_num"; + +/*lsh project*/ +const std::string LSH_PROJ_TYPE = "lsh_project_type"; + +/* log time stamp */ +const std::string LOG_TIME_STAMP_LOGID = "logid"; +const std::string LOG_TIME_STAMP_NOTIFY = "notify"; + +/*ShapeN*/ +const std::string SHAPEN_ATTR_N = "N"; +const std::string SHAPEN_ATTR_IN_TYPE = "in_type"; +const std::string SHAPEN_ATTR_OUT_TYPE = "out_type"; + +/* control flow */ +const std::string ATTR_NAME_ITERATORS_PER_LOOP = "iterations_per_loop"; +const std::string ATTR_NAME_TRUE_BRANCH_STREAM = "true_branch_stream"; +const std::string ATTR_NAME_FLOW_CTRL_NODE_FLAG = "is_flow_ctrl_node"; + +/* GatherV2 attr def */ +const std::string GATHERV2_ATTR_NAME_TAXIS = "Taxis"; +const std::string GATHERV2_ATTR_NAME_TINDICES = "Tindices"; +const std::string GATHERV2_ATTR_NAME_TPARAMS = "Tparams"; + +/* Reshape attr def */ +const std::string RESHAPE_ATTR_NAME_INPUT_DESC = "input_desc_reshape"; +const std::string RESHAPE_ATTR_NAME_OUTPUT_DESC = "output_desc_reshape"; + +/* axis attr def */ +const std::string ATTR_NAME_AXIS_ORG_OP = "axis_org_op"; + +const std::string ATTR_NAME_LINK_WITH_SPARE = "link_with_sparse"; + +const std::string ATTR_NAME_NET_OUTPUT_FORMAT = "net_output_format"; +const std::string ATTR_NAME_NET_OUTPUT_DATATYPE = "net_output_datatype"; + +} // namespace domi diff --git a/src/ge/common/op/ge_op_utils.cc b/src/ge/common/op/ge_op_utils.cc index cce9b91d..2a8d81ea 100644 --- a/src/ge/common/op/ge_op_utils.cc +++ b/src/ge/common/op/ge_op_utils.cc @@ -18,8 +18,6 @@ #include -#include "cce/dnn.h" -#include "cce/dnn_struct.hpp" #include "common/ge/ge_util.h" #include "external/graph/types.h" #include "framework/common/debug/ge_log.h" @@ -74,352 +72,6 @@ const uint32_t SWITCH_FALSE_OUTPUT = 0; const uint32_t SWITCH_TRUE_OUTPUT = 1; const uint32_t SWITCH_DATA_INPUT = 0; const uint32_t SWITCH_PRED_INPUT = 1; -// Internal constant -const uint32_t kPoolMaskDescWinH = 4; -const uint32_t kPoolMaskDescWinW = 5; -const uint32_t kPoolMaskDescDimSize = 6; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool OpUtils::IsComputDimsSize(const int32_t format, - const uint32_t real_dim_cnt) { - return ((format == cce::CC_TENSOR_ND) || - ((format != cce::CC_TENSOR_NC1KHKWHWC0) && (format != cce::CC_TENSOR_C1HWNCoC0) && - (real_dim_cnt > DIM_DEFAULT_SIZE))); -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status -OpUtils::InitTensorDescriptor(const GeTensorDesc &tensor, cce::ccTensorDescriptor_t &cc_tensor) { - return InitTensorDescriptor(tensor, static_cast(tensor.GetDataType()), cc_tensor); -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OpUtils::InitTensorDescriptor( - const GeTensorDesc &model_tensor, int32_t dst_data_type, cce::ccTensorDescriptor_t &cc_tensor) { - uint32_t real_dim_cnt = OpUtils::GetRealDimCnt(model_tensor); - return InitTensorDescriptor(static_cast(model_tensor.GetFormat()), dst_data_type, - model_tensor.GetShape().GetDims(), cc_tensor, real_dim_cnt); -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status -OpUtils::InitTensorDescriptor(const GeTensor &model_tensor, cce::ccTensorDescriptor_t &cc_tensor) { - return InitTensorDescriptor(model_tensor, static_cast(model_tensor.GetTensorDesc().GetDataType()), - cc_tensor); -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OpUtils::InitTensorDescriptor( - const GeTensor &model_tensor, int32_t dst_data_type, cce::ccTensorDescriptor_t &cc_tensor) { - const GeTensorDesc &tensor_desc = model_tensor.GetTensorDesc(); - const GeShape &shape = tensor_desc.GetShape(); - return InitTensorDescriptor(static_cast(tensor_desc.GetFormat()), dst_data_type, shape.GetDims(), cc_tensor, - static_cast(shape.GetDimNum())); -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status -OpUtils::InitTensorDescriptor(int32_t format, int32_t data_type, const std::vector &dim, - cce::ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt) { - Status ret = SUCCESS; - ccDataType_t data_type_ = cce::tagCcDataType(data_type); - real_dim_cnt = - static_cast(((real_dim_cnt == 0) && (dim.size() > DIM_DEFAULT_SIZE)) ? dim.size() : real_dim_cnt); - if (IsComputDimsSize(format, real_dim_cnt)) { - GE_CHK_CCE_RET(cce::ccCreateTensorDescriptor(&cc_tensor)); -#if (defined(__GNUC__) && !(defined(__ICC) || defined(__INTEL_COMPILER))) && \ - (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 50000 - // Variable length array initialization is not supported in gcc4. X compilation environment - GE_CHK_BOOL_RET_STATUS(real_dim_cnt <= CC_DIM_MAX, domi::CCE_FAILED, "real_dim_cnt support <= 8."); - int32_t real_dim[CC_DIM_MAX] = {0}; -#else - int32_t real_dim[real_dim_cnt] = {}; -#endif - uint32_t i = 0; - for (auto dim_temp : dim) { - GE_CHK_BOOL_EXEC_NOLOG(i < real_dim_cnt && i < kDimMaxSize, break); - real_dim[i] = static_cast(dim_temp); - i++; - } - - auto cc_ret = cce::ccSetTensorNdDescriptor(cc_tensor, data_type_, real_dim_cnt, real_dim); - GE_IF_BOOL_EXEC(cc_ret != cce::CC_STATUS_SUCCESS, - GELOGE(domi::CCE_FAILED, "Call cce failed. cc_ret = %d", cc_ret); - GE_CHK_CCE(cce::ccDestroyTensorDescriptor(&cc_tensor)); return domi::CCE_FAILED); - - return ret; - } else if (format == cce::CC_TENSOR_NC1KHKWHWC0) { - GE_CHK_CCE_RET(cce::ccCreatePoolingMaskDescriptor(&cc_tensor)); - cce::ccTensorFormat_t format_new = cce::tagCcTensorFormat(format); - GE_IF_BOOL_EXEC( - dim.size() != kPoolMaskDescDimSize, - GELOGE(PARAM_INVALID, "format CC_TENSOR_NC1KHKWHWC0 dim size must be 6,dim size id %lu.", dim.size()); - GE_CHK_CCE(cce::ccDestroyTensorDescriptor(&cc_tensor)); return PARAM_INVALID); - auto cc_ret = ccSetPoolingMaskTensorDescriptor( - cc_tensor, format_new, data_type_, static_cast(dim[NCHW_DIM_N]), - static_cast(dim[NCHW_DIM_C]), static_cast(dim[NCHW_DIM_H]), - static_cast(dim[NCHW_DIM_W]), static_cast(dim[kPoolMaskDescWinH]), - static_cast(dim[kPoolMaskDescWinW])); - - GE_IF_BOOL_EXEC(cc_ret != cce::CC_STATUS_SUCCESS, - GELOGE(domi::CCE_FAILED, "Call cce failed. cc_ret = %d", cc_ret); - GE_CHK_CCE(cce::ccDestroyTensorDescriptor(&cc_tensor)); return domi::CCE_FAILED); - return ret; - } else if (format == cce::CC_TENSOR_C1HWNCoC0) { - GE_CHK_CCE_RET(cce::ccCreateTensorDescriptor(&cc_tensor)); - cce::ccTensorFormat_t format_new = cce::tagCcTensorFormat(format); - GE_IF_BOOL_EXEC( - dim.size() != DIM_C1HWNCoC0_SIZE, - GELOGE(PARAM_INVALID, "format C1HWNCoC0_DIM_SIZE dim size must be 5,dim size id %lu.", dim.size()); - GE_CHK_CCE(cce::ccDestroyTensorDescriptor(&cc_tensor)); return PARAM_INVALID); - - auto cc_ret = cce::ccSetFilter6dDescriptor( - cc_tensor, format_new, data_type_, static_cast(dim[C1HWNCoC0_DIM_C1]), - static_cast(dim[C1HWNCoC0_DIM_H]), static_cast(dim[C1HWNCoC0_DIM_W]), - static_cast(dim[C1HWNCoC0_DIM_N]), static_cast(dim[C1HWNCoC0_DIM_Co]), - static_cast(dim[C1HWNCoC0_DIM_C0])); - - GE_IF_BOOL_EXEC(cc_ret != cce::CC_STATUS_SUCCESS, GELOGE(CCE_FAILED, "Call cce failed. cc_ret = %d", cc_ret); - GE_CHK_CCE(cce::ccDestroyTensorDescriptor(&cc_tensor)); return CCE_FAILED); - - return ret; - } - std::vector dim_vector; - (void)TransferDim(dim, dim_vector); // TransferDim always return success, no need to check value - // format - if (!CheckEnumValid(format, cce::CC_TENSOR_NCHW, cce::CC_TENSOR_RESERVED)) { - GELOGE(PARAM_INVALID, "not supported format, format = %d", format); - return PARAM_INVALID; - } - cce::ccTensorFormat_t format_new = cce::tagCcTensorFormat(format); - - // data type - if (!CheckEnumValid(data_type, cce::CC_DATA_FLOAT, cce::CC_DATA_RESERVED)) { - GELOGE(PARAM_INVALID, "not supported data type, type = %d", data_type); - return PARAM_INVALID; - } - - // create tensor descriptor - GE_CHK_CCE_RET(cce::ccCreateTensorDescriptor(&cc_tensor)); - - // input shape - size_t input_shape_size = dim_vector.size(); - GE_IF_BOOL_EXEC(input_shape_size != DIM_DEFAULT_SIZE, GELOGI("input_shape_size is %zu", input_shape_size)); - - // The last two outputs of fusedbatchnormgrad are 0. Need special processing for fusedbatchnormgrad. - GE_IF_BOOL_EXEC(dim.size() == 1 && dim[0] == 0, - GE_IF_BOOL_EXEC(cce::ccSetTensorRealDimCnt(cc_tensor, real_dim_cnt) != cce::CC_STATUS_SUCCESS, - GELOGE(domi::CCE_FAILED, "Call cce failed."); - GE_CHK_CCE(cce::ccDestroyTensorDescriptor(&cc_tensor)); return domi::CCE_FAILED); - return ret); - - if (format == cce::CC_TENSOR_NHWC) { - auto cc_ret = cce::ccSetTensor4dDescriptor( - cc_tensor, format_new, data_type_, static_cast(dim_vector.at(NHWC_DIM_N)), - static_cast(dim_vector.at(NHWC_DIM_C)), static_cast(dim_vector.at(NHWC_DIM_H)), - static_cast(dim_vector.at(NHWC_DIM_W))); - - GE_IF_BOOL_EXEC(cc_ret != cce::CC_STATUS_SUCCESS, - GELOGE(domi::CCE_FAILED, "Call cce failed. cc_ret = %d", cc_ret); - ret = domi::CCE_FAILED); - } else if (format == cce::CC_TENSOR_HWCN) { - auto cc_ret = cce::ccSetTensor4dDescriptor( - cc_tensor, format_new, data_type_, static_cast(dim_vector.at(NHWC_DIM_C)), - static_cast(dim_vector.at(NHWC_DIM_W)), static_cast(dim_vector.at(NHWC_DIM_N)), - static_cast(dim_vector.at(NHWC_DIM_H))); - - GE_IF_BOOL_EXEC(cc_ret != cce::CC_STATUS_SUCCESS, - GELOGE(domi::CCE_FAILED, "Call cce failed. cc_ret = %d", cc_ret); - ret = domi::CCE_FAILED); - } else if (format >= cce::CC_TENSOR_HASHTABLE_LOOKUP_LOOKUPS && format <= cce::CC_TENSOR_HASHTABLE_LOOKUP_HITS) { - int32_t dims[dim.size()]; - for (size_t i = 0; i < dim.size(); i++) { - dims[i] = static_cast(dim[i]); - } - - auto cc_ret = cce::ccSetTensorNdDescriptor(cc_tensor, data_type_, static_cast(dim.size()), dims); - cce::ccSetTensorFormat(cc_tensor, format_new); - GE_IF_BOOL_EXEC(cc_ret != cce::CC_STATUS_SUCCESS, GELOGE(CCE_FAILED, "Call cce failed. cc_ret = %d", cc_ret); - ret = CCE_FAILED); - } else { - auto cc_ret = cce::ccSetTensor4dDescriptor( - cc_tensor, format_new, data_type_, static_cast(dim_vector.at(NHWC_DIM_N)), - static_cast(dim_vector.at(NHWC_DIM_H)), static_cast(dim_vector.at(NHWC_DIM_W)), - static_cast(dim_vector.at(NHWC_DIM_C))); - - GE_IF_BOOL_EXEC(cc_ret != cce::CC_STATUS_SUCCESS, - GELOGE(domi::CCE_FAILED, "Call cce failed. cc_ret = %d", cc_ret); - ret = domi::CCE_FAILED); - } - auto cc_ret = cce::ccSetTensorRealDimCnt(cc_tensor, real_dim_cnt); - GE_IF_BOOL_EXEC(cc_ret != cce::CC_STATUS_SUCCESS, GELOGE(domi::CCE_FAILED, "Call cce failed. cc_ret = %d", cc_ret); - ret = domi::CCE_FAILED); - - if (ret != SUCCESS) { - GE_CHK_CCE(cce::ccDestroyTensorDescriptor(&cc_tensor)); - cc_tensor = nullptr; - } - - return ret; -} - -// Initialize filter description -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status -OpUtils::InitFilterDescriptor(const GeTensor &model_filter, cce::ccFilterDescriptor_t &cc_filter) { - const GeTensorDesc &tensor_desc = model_filter.GetTensorDesc(); - const GeShape &shape = tensor_desc.GetShape(); - const std::vector dims = shape.GetDims(); - - // format - RETURN_IF_TRUE(!CheckEnumValid(tensor_desc.GetFormat(), cce::CC_TENSOR_NCHW, cce::CC_TENSOR_RESERVED), PARAM_INVALID, - "not supported format, format = %d", tensor_desc.GetFormat()); - - uint32_t tmp_int = static_cast(tensor_desc.GetFormat()); - cce::ccTensorFormat_t format = cce::tagCcTensorFormat(tmp_int); - - // data type - RETURN_IF_TRUE(!CheckEnumValid(tensor_desc.GetDataType(), cce::CC_DATA_FLOAT, cce::CC_DATA_RESERVED), PARAM_INVALID, - "not supported data type, type = %s", - TypeUtils::DataTypeToSerialString(tensor_desc.GetDataType()).c_str()); - - uint32_t dt_tmp = static_cast(tensor_desc.GetDataType()); - - ccDataType_t dataType = cce::tagCcDataType(dt_tmp); - - // create filter descriptor - GE_CHK_CCE_RET(cce::ccCreateFilterDescriptor(&cc_filter)); - - Status ret = SUCCESS; - // input filter - size_t filter_shape_size = shape.GetDimNum(); - if (filter_shape_size == DIM_DEFAULT_SIZE) { - cce::ccStatus_t cc_ret = cce::CC_STATUS_SUCCESS; - - GE_IF_BOOL_EXEC(dims.size() < 4, GELOGE(domi::CCE_FAILED, "dims is invalid!"); return domi::CCE_FAILED); - - if (dataType == CC_DATA_INT8) { - cc_ret = ccSetInt8Filter4dDescriptor(cc_filter, format, dataType, static_cast(dims[KCHW_DIM_K]), - static_cast(dims[KCHW_DIM_C]), - static_cast(dims[KCHW_DIM_H]), - static_cast(dims[KCHW_DIM_W]), cce::CC_DATA_HALF); - } else if (format == cce::CC_TENSOR_FRACTAL_Z_C04 || format == cce::CC_TENSOR_FRACTAL_DECONV_SP_STRIDE_TRANS || - format == cce::CC_TENSOR_FRACTAL_Z || format == cce::CC_TENSOR_FRACTAL_DECONV) { - cc_ret = cce::ccSetFilterFractalDescriptor( - cc_filter, format, dataType, static_cast(dims[KCHW_DIM_K]), - static_cast(dims[KCHW_DIM_C]), static_cast(dims[KCHW_DIM_H]), - static_cast(dims[KCHW_DIM_W])); - } else if (format == cce::CC_TENSOR_NHWC) { - cc_ret = cce::ccSetFilter4dDescriptor(cc_filter, format, dataType, static_cast(dims[NHWC_DIM_N]), - static_cast(dims[NHWC_DIM_C]), - static_cast(dims[NHWC_DIM_H]), - static_cast(dims[NHWC_DIM_W])); - } else if (format == cce::CC_TENSOR_CHWN) { - cc_ret = cce::ccSetFilter4dDescriptor(cc_filter, format, dataType, static_cast(dims[CHWN_DIM_N]), - static_cast(dims[CHWN_DIM_C]), - static_cast(dims[CHWN_DIM_H]), - static_cast(dims[CHWN_DIM_W])); - } else if (format == cce::CC_TENSOR_HWCN) { - cc_ret = cce::ccSetFilter4dDescriptor(cc_filter, format, dataType, static_cast(dims[NHWC_DIM_C]), - static_cast(dims[NHWC_DIM_W]), - static_cast(dims[NHWC_DIM_N]), - static_cast(dims[NHWC_DIM_H])); - } else { - cc_ret = cce::ccSetFilter4dDescriptor(cc_filter, format, dataType, static_cast(dims[KCHW_DIM_K]), - static_cast(dims[KCHW_DIM_C]), - static_cast(dims[KCHW_DIM_H]), - static_cast(dims[KCHW_DIM_W])); - } - - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(domi::CCE_FAILED, "ccSetFilterDescriptor failed. cc_ret = %d, format1 = %d", cc_ret, format); - ret = domi::CCE_FAILED; - } - } else { - GELOGE(UNSUPPORTED, "not supported shape size. size = %d", filter_shape_size); - ret = UNSUPPORTED; - } - - if (ret != SUCCESS) { - GE_CHK_CCE(cce::ccDestroyFilterDescriptor(&cc_filter)); - cc_filter = nullptr; - } - - return ret; -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool OpUtils::ConvertDim(cce::ccTensorFormat_t src_format, - const std::vector &src, - cce::ccTensorFormat_t dst_format, - std::vector &dst) { - // The input of 3-dimension and 4-dimension is considered as picture dimension, - // which needs to be converted according to specific format - if ((src.size() != DIM_DEFAULT_SIZE && src.size() != 3) || src_format == dst_format) { - GELOGI("Convert format , src size %zu <3 ,not need convert", src.size()); - dst = src; - return true; - } - - std::vector nchw_dim; - - switch (src_format) { - case cce::CC_TENSOR_NCHW: - nchw_dim = src; - break; - case cce::CC_TENSOR_NHWC: - if (src.size() == DIM_DEFAULT_SIZE) { - nchw_dim.push_back(src[NHWC_DIM_N]); - nchw_dim.push_back(src[NHWC_DIM_C]); - nchw_dim.push_back(src[NHWC_DIM_H]); - nchw_dim.push_back(src[NHWC_DIM_W]); - } else { - nchw_dim.push_back(src[HWC_DIM_C]); - nchw_dim.push_back(src[HWC_DIM_H]); - nchw_dim.push_back(src[HWC_DIM_W]); - } - break; - default: - GELOGW("Not support src format is %d", src_format); - return false; - } - - if (nchw_dim.empty()) { - GELOGW("Vector is empty!"); - return false; - } - - switch (dst_format) { - case cce::CC_TENSOR_NCHW: - dst = nchw_dim; - break; - case cce::CC_TENSOR_NHWC: - if (src.size() == DIM_DEFAULT_SIZE) { - dst.push_back(nchw_dim[NCHW_DIM_N]); - dst.push_back(nchw_dim[NCHW_DIM_H]); - dst.push_back(nchw_dim[NCHW_DIM_W]); - dst.push_back(nchw_dim[NCHW_DIM_C]); - } else { - dst.push_back(nchw_dim[CHW_DIM_H]); - dst.push_back(nchw_dim[CHW_DIM_W]); - dst.push_back(nchw_dim[CHW_DIM_C]); - } - break; - default: - GELOGW("Not support dst format of %d", dst_format); - return false; - } - - return true; -} -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void OpUtils::DestroyTensorDescriptor( - cce::ccTensorDescriptor_t &cc_tensor) noexcept { - if (cc_tensor != nullptr) { - cce::ccStatus_t ret = cce::ccDestroyTensorDescriptor(&cc_tensor); - GE_LOGE_IF(ret != cce::CC_STATUS_SUCCESS, "cce::ccDestroyTensorDescriptor failed. ret = %d", ret); - cc_tensor = nullptr; - } -} -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void OpUtils::DestroyFilterDescriptor( - cce::ccFilterDescriptor_t &cc_filter) { - if (cc_filter != nullptr) { - cce::ccStatus_t ret = ccDestroyFilterDescriptor(&cc_filter); - GE_LOGE_IF(ret != cce::CC_STATUS_SUCCESS, "ccDestroyFilterDescriptor failed. ret = %d", ret); - cc_filter = nullptr; - } -} // Get the value of key from attr #define AIPP_GET_ATTR_VALUE(KEY, ATTR_TYPE) \ @@ -522,68 +174,6 @@ OpUtils::ConvertAippParams(const GeAttrValue::NamedAttrs &aipp_attr, domi::AippO return SUCCESS; } -CceTensorDescriptor::CceTensorDescriptor(cce::ccTensorDescriptor_t cc_tensor) : cc_tensor_(cc_tensor) {} - -CceTensorDescriptor::~CceTensorDescriptor() { - if (cc_tensor_ != nullptr) { - OpUtils::DestroyTensorDescriptor(cc_tensor_); - cc_tensor_ = nullptr; - } -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status CceTensorDescriptor::InitTensor(int32_t format, - int32_t data_type, - const vector &dims) { - if (cc_tensor_ != nullptr) { - GELOGE(PARAM_INVALID, "Cannot init cce tensor descriptor twice!"); - return PARAM_INVALID; - } - cce::ccTensorDescriptor_t cc_tensor = nullptr; - - Status ret = OpUtils::InitTensorDescriptor(format, data_type, dims, cc_tensor); - - GE_CHK_STATUS_EXEC(ret, OpUtils::DestroyTensorDescriptor(cc_tensor); return FAILED, "init cc_tensor failed."); - - cc_tensor_ = cc_tensor; - return SUCCESS; -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status CceTensorDescriptor::InitTensor(int32_t format, - int32_t data_type, - const ge::GeShape &shape) { - return InitTensor(format, data_type, shape.GetDims()); -} - -Status CceTensorDescriptor::GetFormat(cce::ccTensorFormat_t *format) { - GE_CHECK_NOTNULL(format); - GE_CHK_CCE_RET(cce::ccGetTensorFormat(cc_tensor_, format)); - return SUCCESS; -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status CceTensorDescriptor::GetTensorSizeInBytes(uint32_t *size) { - GE_CHECK_NOTNULL(size); - GE_CHK_CCE_RET(cce::ccGetTensorSizeInBytes(cc_tensor_, size)); - return SUCCESS; -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status -CceTensorDescriptor::TransTensor(const cce::ccTensorDescriptor_t x_desc, const void *x, - const CceTensorDescriptorPtr &y_desc, void *y, uint32_t y_size_in_bytes) { - GE_CHECK_NOTNULL(y_desc); - GE_CHK_CCE_RET(cce::ccTransTensor(x_desc, x, y_desc->cc_tensor_, y, y_size_in_bytes)); - return SUCCESS; -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY CceTensorDescriptorPtr CceTensorDescriptor::Create() { - shared_ptr desc = nullptr; - desc = ge::MakeShared(nullptr); - if (desc == nullptr) { - GELOGE(FAILED, "Make CceTensorDescriptor failed."); - return nullptr; - } - return desc; -} - FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OpUtils::TransferDim(const std::vector &dim, std::vector &dim_vector) { size_t input_shape_size = dim.size(); @@ -643,8 +233,8 @@ void OpUtils::SliceData(std::vector &input, int64_t chunk_size, std::vec } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OpUtils::SetOutputSliceData( - void *data, int64_t data_size, int32_t data_type, std::vector &input_dims, std::vector &begin, - std::vector &output_dims, GeTensor *output, std::vector &stride) { + void *data, int64_t data_size, int32_t data_type, std::vector &input_dims, std::vector &begin, + std::vector &output_dims, GeTensor *output, std::vector &stride) { GE_CHECK_NOTNULL(data); GE_CHECK_NOTNULL(output); std::vector chunk_input; @@ -727,7 +317,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void OpUtils::TransDataHWCK2KCH const char *w_data = (const char *)input; int64_t count = h * w * c * k; - GE_IF_BOOL_EXEC(count <= 0, GELOGW("Count value must be greater than 0, but count = %ld", count); return); + GE_IF_BOOL_EXEC(count <= 0, GELOGW("Count value must be greater than 0, but count = %ld", count); return ); float *buf = new (std::nothrow) float[count](); GE_RT_VOID_CHECK_NOTNULL(buf); float *src_buff = nullptr; @@ -778,58 +368,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void OpUtils::TransDataKCHW2HWC } } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status -OpUtils::InitFilterTensorDescriptor(const GeTensorDesc &model_tensor, cce::ccFilterDescriptor_t &cc_tensor) { - auto dims = model_tensor.GetShape().GetDims(); - auto dim_size = dims.size(); - if (dim_size == 0) { - GELOGE(FAILED, "Invalid dim size"); - return FAILED; - } - uint32_t cc_format_tmp = static_cast(model_tensor.GetFormat()); - cce::ccTensorFormat_t format = cce::tagCcTensorFormat(cc_format_tmp); - uint32_t model_tensor_dt = static_cast(model_tensor.GetDataType()); - ccDataType_t data_type = cce::tagCcDataType(model_tensor_dt); - GE_CHK_BOOL_EXEC( - ((format == cce::CC_TENSOR_NCHW) || (format == cce::CC_TENSOR_FRACTAL_Z) || (format == cce::CC_TENSOR_HWCN)), - return PARAM_INVALID, "Filter tensor format:%d not correct.", format); - GE_IF_BOOL_EXEC(static_cast(dims.size()) <= NCHW_DIM_W, - GELOGE(PARAM_INVALID, "Array index is invalid!"); - return PARAM_INVALID); - // create tensor descriptor - GE_CHK_CCE_RET(cce::ccCreateFilterDescriptor(&cc_tensor)); - if (format == cce::CC_TENSOR_FRACTAL_Z) { - GE_CHK_CCE_RET(cce::ccSetFilterFractalDescriptor( - cc_tensor, format, data_type, static_cast(dims[NCHW_DIM_N]), - static_cast(dims[NCHW_DIM_C]), static_cast(dims[NCHW_DIM_H]), - static_cast(dims[NCHW_DIM_W]))); - } else if (format == cce::CC_TENSOR_HWCN) { - GE_CHK_CCE_RET(cce::ccSetFilter4dDescriptor( - cc_tensor, format, data_type, static_cast(dims[NCHW_DIM_W]), - static_cast(dims[NCHW_DIM_H]), static_cast(dims[NCHW_DIM_N]), - static_cast(dims[NCHW_DIM_C]))); - } else { - GE_CHK_CCE_RET(cce::ccSetFilter4dDescriptor( - cc_tensor, format, data_type, static_cast(dims[NCHW_DIM_N]), - static_cast(dims[NCHW_DIM_C]), static_cast(dims[NCHW_DIM_H]), - static_cast(dims[NCHW_DIM_W]))); - } - return SUCCESS; -} - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void OpUtils::SetTensorDescriptorAllOffsetQuantizeInfo( - const GeTensorDesc &tensor, cce::ccTensorDescriptor_t cc_tensor) { - GE_IF_BOOL_EXEC(!TensorUtils::HasAlloffsetQuantizeInfo(tensor), return;); - ccVecQuantizePara_t temp; - AllOffsetQuantizeInfo temp_quantInfo; - GE_CHK_BOOL_EXEC(TensorUtils::GetAlloffsetQuantizeInfo(tensor, temp_quantInfo) == GRAPH_SUCCESS, return, - "Execute GetAlloffsetQuantizeInfo failed."); - temp.scale = temp_quantInfo.scale; - temp.offset = static_cast(temp_quantInfo.offset); - temp.rrv = 0; - cce::ccSetTensorDescriptorQuantizeParam(cc_tensor, &temp); -} - vector OpUtils::GetWeights(const ge::Node &node) { return OpDescUtils::GetWeights(node); } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY vector OpUtils::GetWeights(ge::ConstNodePtr node) { @@ -871,12 +409,14 @@ OpUtils::GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, DataType ty if (type == DT_INT32) { int32_t *shape_data = const_cast(reinterpret_cast(tensor->GetData().GetData())); + GE_CHECK_NOTNULL(shape_data); size_t dims_num = tensor->GetData().size() / sizeof(int32_t); for (size_t i = 0; i < dims_num; i++) { dims.push_back(static_cast(shape_data[i])); } } else if (type == DT_INT64) { int64_t *shape_data = const_cast(reinterpret_cast(tensor->GetData().GetData())); + GE_CHECK_NOTNULL(shape_data); size_t dims_num = tensor->GetData().size() / sizeof(int64_t); for (size_t i = 0; i < dims_num; i++) { dims.push_back(shape_data[i]); diff --git a/src/ge/common/profiling/profiling_manager.cc b/src/ge/common/profiling/profiling_manager.cc index 4c38e22c..bbe105b8 100644 --- a/src/ge/common/profiling/profiling_manager.cc +++ b/src/ge/common/profiling/profiling_manager.cc @@ -16,11 +16,11 @@ #include "common/profiling/profiling_manager.h" -#include "nlohmann/json.hpp" - +#include #include "framework/common/debug/ge_log.h" #include "framework/common/debug/log.h" #include "framework/common/string_util.h" +#include "graph/ge_context.h" #include "runtime/base.h" using Json = nlohmann::json; @@ -34,6 +34,7 @@ const char *const kConf = "conf"; const char *const kEvents = "events"; const char *const kAiCoreEvents = "ai_core_events"; const char *const kName = "name"; +const char *const kTraceID = "traceId"; } // namespace namespace ge { @@ -49,7 +50,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager &ProfilingMana FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options) { #ifdef DAVINCI_SUPPORT_PROFILING device_id_ = options.device_id; - job_id_ = std::to_string(options.job_id); + job_id_ = options.job_id; Status ret; if (!recv_profiling_config_.empty()) { @@ -84,7 +85,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::InitFromAclCfg( - const std::string &config) { + const std::string &config) { #ifdef DAVINCI_SUPPORT_PROFILING try { is_profiling_ = false; @@ -185,6 +186,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St // profiling need physical_device_id p_device[kDeviceID] = std::to_string(device_id_); p_device[kJobID] = job_id_; + p_device[kTraceID] = std::to_string(GetContext().TraceId()); Json features; if (is_op_trace_) { @@ -269,7 +271,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( - const std::map &op_task_id_map) { + const std::map &op_task_id_map) { #ifdef DAVINCI_SUPPORT_PROFILING Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); if (reporter == nullptr) { @@ -299,13 +301,13 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::SetProfilingConfig( - const std::string &profiling_cfg) { + const std::string &profiling_cfg) { recv_profiling_config_ = profiling_cfg; } /** * @brief Profiling PluginImpl -*/ + */ // PluginImpl static variable init Msprof::Engine::Reporter *PluginImpl::reporter_ = nullptr; diff --git a/src/ge/common/properties_manager.cc b/src/ge/common/properties_manager.cc index 16952883..7ec56473 100644 --- a/src/ge/common/properties_manager.cc +++ b/src/ge/common/properties_manager.cc @@ -29,6 +29,9 @@ #include "common/util.h" namespace ge { + +static const std::set black_list = {"IteratorV2"}; + PropertiesManager::PropertiesManager() : is_inited_(false), delimiter("=") {} PropertiesManager::~PropertiesManager() {} @@ -61,7 +64,7 @@ bool PropertiesManager::LoadFileContent(const std::string &file_path) { // Normalize the path string resolved_file_path = RealPath(file_path.c_str()); if (resolved_file_path.empty()) { - GELOGE(false, "Invalid input file path [%s], make sure that the file path is correct.", file_path.c_str()); + GE_LOGE("Invalid input file path [%s], make sure that the file path is correct.", file_path.c_str()); return false; } std::ifstream fs(resolved_file_path, std::ifstream::in); @@ -199,6 +202,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set Propertie FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool PropertiesManager::IsLayerNeedDump(const std::string &model, const std::string &op_name) { std::lock_guard lock(dump_mutex_); + + if (black_list.find(op_name) != black_list.end()) { + return false; + } + // if dump all if (model_dump_properties_map_.find(ge::DUMP_ALL_MODEL) != model_dump_properties_map_.end()) { return true; diff --git a/src/ge/common/thread_pool.h b/src/ge/common/thread_pool.h index 6a07d61d..ea9227bf 100644 --- a/src/ge/common/thread_pool.h +++ b/src/ge/common/thread_pool.h @@ -45,17 +45,20 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ThreadPool { template auto commit(Func &&func, Args &&... args) -> std::future { GELOGD("commit run task enter."); + using retType = decltype(func(args...)); + std::future fail_future; if (is_stoped_.load()) { GELOGE(ge::FAILED, "thread pool has been stopped."); + return fail_future; } - using RetType = decltype(func(args...)); auto bind_func = std::bind(std::forward(func), std::forward(args)...); - auto task = ge::MakeShared>(bind_func); + auto task = ge::MakeShared>(bind_func); if (task == nullptr) { - GELOGW("Make shared failed."); + GELOGE(ge::FAILED, "Make shared failed."); + return fail_future; } - std::future future = task->get_future(); + std::future future = task->get_future(); { std::lock_guard lock{m_lock_}; tasks_.emplace([task]() { (*task)(); }); diff --git a/src/ge/common/types.cc b/src/ge/common/types.cc index 36091e76..e36c147f 100644 --- a/src/ge/common/types.cc +++ b/src/ge/common/types.cc @@ -16,7 +16,7 @@ #include "framework/common/types.h" -#include "cce/dnn.h" +#include "graph/types.h" namespace ge { // dump @@ -29,7 +29,7 @@ const std::string DUMP_FILE_PATH = "path"; using ge::OpTypeRegistrar; namespace ge { -const int DEFAULT_FORMAT = static_cast(cce::CC_TENSOR_NCHW); +const int DEFAULT_FORMAT = static_cast(ge::FORMAT_NCHW); // Supported public property names const std::string PROP_OME_START_TIME = "ome_start_time"; // start time const std::string PROP_OME_DUMP_PATH = "ome_dump_path"; // dump path @@ -47,7 +47,9 @@ const std::string RTS_PROFILE_PATH = "RTS_PATH"; const std::string PROFILE_STOP_KEY = "stop"; const std::string PROFILE_STOP_VALUE = "enable"; const std::map PROFILE_COMPONENT_MAP{ - {"ome", OME_PROFILE}, {"cce", CCE_PROFILE}, {"runtime", RTS_PROFILE}, + {"ome", OME_PROFILE}, + {"cce", CCE_PROFILE}, + {"runtime", RTS_PROFILE}, }; const std::string PROFILE_CONFIG = "config"; @@ -631,6 +633,6 @@ const std::string NODE_NAME_ATOMIC_ADDR_CLEAN = "atomic_addr_clean"; const uint32_t TRUE_STREAM_ID = 0; const uint32_t STREAM_SWITCH_INPUT_NUM = 2; -const std::string NODE_NAME_GLOBAL_STEP = "global_step"; +const std::string NODE_NAME_GLOBAL_STEP = "ge_global_step"; const std::string NODE_NAME_GLOBAL_STEP_ASSIGNADD = "global_step_assignadd"; }; // namespace ge diff --git a/src/ge/executor/CMakeLists.txt b/src/ge/executor/CMakeLists.txt index 191b2400..265ae5ee 100755 --- a/src/ge/executor/CMakeLists.txt +++ b/src/ge/executor/CMakeLists.txt @@ -26,6 +26,7 @@ file(GLOB PROTO_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "ge_executor.cc" + "../common/ge/plugin_manager.cc" "../common/profiling/profiling_manager.cc" "../graph/execute/graph_execute.cc" "../graph/load/graph_loader.cc" @@ -81,8 +82,6 @@ include_directories(${GE_SOURCE_DIR}/inc/graph) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) include_directories(${GE_SOURCE_DIR}/third_party/securec/include) -include_directories(${GE_SOURCE_DIR}/third_party/json/include) -include_directories(${GE_SOURCE_DIR}/third_party/protobuf/src) include_directories(${CMAKE_BINARY_DIR}) include_directories(${CMAKE_BINARY_DIR}/proto/ge) @@ -99,7 +98,6 @@ target_link_libraries(ge_executor ${PROTOBUF_LIBRARY} ${register} ${c_sec} - ${cce} ${runtime} ${slog} ${mmpa} diff --git a/src/ge/executor/ge_executor.cc b/src/ge/executor/ge_executor.cc index 8be9f6c3..555cef07 100644 --- a/src/ge/executor/ge_executor.cc +++ b/src/ge/executor/ge_executor.cc @@ -16,14 +16,11 @@ #include "executor/ge_executor.h" +#include +#include #include - #include -#include "cce/cce.h" -#include "cce/compiler_stub.h" -#include "cce/aicpu_engine.h" -#include "cce/fwk_adpt_struct.h" #include "common/debug/log.h" #include "framework/common/debug/ge_log.h" #include "common/ge/ge_util.h" @@ -142,7 +139,7 @@ Status GeExecutor::Initialize() { GELOGI("current device_id:%d", device_id); Options profiling_options; profiling_options.device_id = device_id; - profiling_options.job_id = 0; + profiling_options.job_id = ""; ProfilingManager::Instance().Init(profiling_options); if (ProfilingManager::Instance().Init(profiling_options) != SUCCESS) { GELOGE(FAILED, "Failed to init profiling."); @@ -252,8 +249,8 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector output_formats; GELOGI("GetInputOutputDescInfo via new ome."); - Status ret = GraphExecutor::GetInputOutputDescInfo(model_id, input_desc_infos, output_desc_infos, - input_formats, output_formats); + Status ret = + GraphExecutor::GetInputOutputDescInfo(model_id, input_desc_infos, output_desc_infos, input_formats, output_formats); if (ret != domi::SUCCESS) { GELOGE(ret, "GetInputOutputDescInfo failed. ret = %u", ret); return TransferDomiErrorCode(ret); @@ -370,8 +367,8 @@ Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_da /// @param [out] uint32_t &model_id: identification after model loading /// @return SUCCESS handle successfully / others handle failed /// -Status GeExecutor::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, - size_t mem_size, void *weight_ptr, size_t weight_size) { +Status GeExecutor::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, size_t mem_size, + void *weight_ptr, size_t weight_size) { return GraphLoader::LoadModelFromData(model_id, model_data, dev_ptr, mem_size, weight_ptr, weight_size); } @@ -463,9 +460,7 @@ Status GeExecutor::GetMemAndWeightSize(const void *model_data, size_t model_size return ge::ModelManager::GetModelMemAndWeightSize(model, mem_size, weight_size); } -Status GeExecutor::LoadSingleOp(const std::string &model_name, - const ge::ModelData &model_data, - void *stream, +Status GeExecutor::LoadSingleOp(const std::string &model_name, const ge::ModelData &model_data, void *stream, SingleOp **single_op) { return SingleOpManager::GetInstance().GetOpFromModel(model_name, model_data, stream, single_op); } diff --git a/src/ge/ge_local_engine/CMakeLists.txt b/src/ge/ge_local_engine/CMakeLists.txt index f753b307..1db26782 100755 --- a/src/ge/ge_local_engine/CMakeLists.txt +++ b/src/ge/ge_local_engine/CMakeLists.txt @@ -35,11 +35,7 @@ include_directories(${GE_SOURCE_DIR}/inc/external/graph) include_directories(${GE_SOURCE_DIR}/inc/framework) include_directories(${GE_SOURCE_DIR}/inc/graph) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) -include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/tee) include_directories(${GE_SOURCE_DIR}/third_party/securec/include) -include_directories(${GE_SOURCE_DIR}/third_party/json/include) -include_directories(${GE_SOURCE_DIR}/third_party/protobuf/src) include_directories(${CMAKE_BINARY_DIR}) include_directories(${CMAKE_BINARY_DIR}/proto/ge) diff --git a/src/ge/ge_local_engine/common/constant/constant.h b/src/ge/ge_local_engine/common/constant/constant.h index f7f1e2cb..c517d267 100644 --- a/src/ge/ge_local_engine/common/constant/constant.h +++ b/src/ge/ge_local_engine/common/constant/constant.h @@ -17,11 +17,13 @@ #ifndef GE_GE_LOCAL_ENGINE_COMMON_CONSTANT_CONSTANT_H_ #define GE_GE_LOCAL_ENGINE_COMMON_CONSTANT_CONSTANT_H_ +#include + namespace ge { namespace ge_local { // engine name -const char *const kGeLocalEngineName = "DNN_VM_GE_LOCAL"; -const char *const kGeLocalOpKernelLibName = "DNN_VM_GE_LOCAL_OP_STORE"; +const char kGeLocalEngineName[] = "DNN_VM_GE_LOCAL"; +const char kGeLocalOpKernelLibName[] = "DNN_VM_GE_LOCAL_OP_STORE"; } // namespace ge_local } // namespace ge diff --git a/src/ge/ge_local_engine/engine/ge_local_engine.cc b/src/ge/ge_local_engine/engine/ge_local_engine.cc index eb3bc8ba..9525e81b 100644 --- a/src/ge/ge_local_engine/engine/ge_local_engine.cc +++ b/src/ge/ge_local_engine/engine/ge_local_engine.cc @@ -15,11 +15,9 @@ */ #include "ge_local_engine/engine/ge_local_engine.h" - #include #include #include - #include "framework/common/debug/ge_log.h" #include "common/ge_inner_error_codes.h" #include "common/constant/constant.h" diff --git a/src/ge/ge_local_engine/engine/ge_local_engine.h b/src/ge/ge_local_engine/engine/ge_local_engine.h index 350b3e5f..e5f9a24d 100644 --- a/src/ge/ge_local_engine/engine/ge_local_engine.h +++ b/src/ge/ge_local_engine/engine/ge_local_engine.h @@ -20,7 +20,6 @@ #include #include #include - #include "common/opskernel/ops_kernel_info_store.h" #include "common/optimizer/graph_optimizer.h" @@ -29,57 +28,57 @@ using GraphOptimizerPtr = std::shared_ptr; namespace ge { namespace ge_local { -/// -/// ge local engine. -/// Used for the ops not belong to any engine. eg:netoutput -/// +/** + * ge local engine. + * Used for the ops not belong to any engine. eg:netoutput + */ class GeLocalEngine { public: - /// - /// get GeLocalEngine instance. - /// @return GeLocalEngine instance. - /// + /** + * get GeLocalEngine instance. + * @return GeLocalEngine instance. + */ static GeLocalEngine &Instance(); virtual ~GeLocalEngine() = default; - /// - /// When Ge start, GE will invoke this interface - /// @return The status whether initialize successfully - /// + /** + * When Ge start, GE will invoke this interface + * @return The status whether initialize successfully + */ Status Initialize(const std::map &options); - /// - /// After the initialize, GE will invoke this interface - /// to get the Ops kernel Store. - /// @param ops_kernel_map The ge local's ops kernel info - /// + /** + * After the initialize, GE will invoke this interface + * to get the Ops kernel Store. + * @param ops_kernel_map The ge local's ops kernel info + */ void GetOpsKernelInfoStores(std::map &ops_kernel_map); - /// - /// After the initialize, GE will invoke this interface - /// to get the Graph Optimizer. - /// @param graph_optimizers The ge local's Graph Optimizer objs - /// + /** + * After the initialize, GE will invoke this interface + * to get the Graph Optimizer. + * @param graph_optimizers The ge local's Graph Optimizer objs + */ void GetGraphOptimizerObjs(std::map &graph_optimizers); - /// - /// When the graph finished, GE will invoke this interface - /// @return The status whether initialize successfully - /// + /** + * When the graph finished, GE will invoke this interface + * @return The status whether initialize successfully + */ Status Finalize(); // Copy prohibited - GeLocalEngine(const GeLocalEngine &ge_local_engine) = delete; + GeLocalEngine(const GeLocalEngine &geLocalEngine) = delete; // Move prohibited - GeLocalEngine(const GeLocalEngine &&ge_local_engine) = delete; + GeLocalEngine(const GeLocalEngine &&geLocalEngine) = delete; // Copy prohibited - GeLocalEngine &operator=(const GeLocalEngine &ge_local_engine) = delete; + GeLocalEngine &operator=(const GeLocalEngine &geLocalEngine) = delete; // Move prohibited - GeLocalEngine &operator=(GeLocalEngine &&ge_local_engine) = delete; + GeLocalEngine &operator=(GeLocalEngine &&geLocalEngine) = delete; private: GeLocalEngine() = default; @@ -91,28 +90,28 @@ class GeLocalEngine { extern "C" { -/// -/// When Ge start, GE will invoke this interface -/// @return The status whether initialize successfully -/// +/** + * When Ge start, GE will invoke this interface + * @return The status whether initialize successfully + */ ge::Status Initialize(const map &options); -/// -/// After the initialize, GE will invoke this interface to get the Ops kernel Store -/// @param ops_kernel_map The ge local's ops kernel info -/// +/** + * After the initialize, GE will invoke this interface to get the Ops kernel Store + * @param ops_kernel_map The ge local's ops kernel info + */ void GetOpsKernelInfoStores(std::map &ops_kernel_map); -/// -/// After the initialize, GE will invoke this interface to get the Graph Optimizer -/// @param graph_optimizers The ge local's Graph Optimizer objs -/// +/** + * After the initialize, GE will invoke this interface to get the Graph Optimizer + * @param graph_optimizers The ge local's Graph Optimizer objs + */ void GetGraphOptimizerObjs(std::map &graph_optimizers); -/// -/// When the graph finished, GE will invoke this interface -/// @return The status whether initialize successfully -/// +/** + * When the graph finished, GE will invoke this interface + * @return The status whether initialize successfully + */ ge::Status Finalize(); } diff --git a/src/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc b/src/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc index 7b354c86..0e5844b8 100644 --- a/src/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc +++ b/src/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc @@ -15,21 +15,21 @@ */ #include "ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h" - #include - #include "common/constant/constant.h" #include "framework/common/debug/ge_log.h" #include "common/ge_inner_error_codes.h" #include "common/ge/ge_util.h" +#include "common/ge_inner_error_codes.h" +#include "framework/common/debug/ge_log.h" #include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" #include "op/op_factory.h" #include "proto/task.pb.h" namespace { -const char *const kConstantOpType = "Constant"; -const char *const kConstantOpAttrName = "value"; +const char *kConstantOpType = "Constant"; +const char *kConstantOpAttrName = "value"; } // namespace namespace ge { namespace ge_local { @@ -113,10 +113,10 @@ Status GeLocalOpsKernelInfoStore::CalcOpRunningParam(Node &ge_node) { return FAILED; } GELOGI( - "Calc op[%s:%s] out[%zu] mem size is %ld," - " format=%s, data_type=%s.", - node_name.c_str(), node_type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str()); + "Calc op[%s:%s] out[%zu] mem size is %ld," + " format=%s, data_type=%s.", + node_name.c_str(), node_type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); if (output_mem_size > static_cast(UINT_MAX)) { GELOGE(FAILED, @@ -176,7 +176,7 @@ Status GeLocalOpsKernelInfoStore::GenerateTask(const Node &node, RunContext &con GELOGE(ret, "Node:%s(%s) op run failed.", name.c_str(), type.c_str()); return ret; } - GELOGI("Ge local generate task for node:%s(%s) end, tasks.size()=%zu.", name.c_str(), type.c_str(), tasks.size()); + GELOGD("Ge local generate task for node:%s(%s) end, tasks.size()=%zu.", name.c_str(), type.c_str(), tasks.size()); return ret; } diff --git a/src/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h b/src/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h index a04367ff..ce123751 100644 --- a/src/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h +++ b/src/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h @@ -31,61 +31,61 @@ class GeLocalOpsKernelInfoStore : public OpsKernelInfoStore { ~GeLocalOpsKernelInfoStore() override = default; - /// - /// Initialize related resources of the ge local kernelinfo store - /// @return status whether this operation success - /// + /** + * Initialize related resources of the ge local kernelinfo store + * @return status whether this operation success + */ Status Initialize(const std::map &options) override; - /// - /// Release related resources of the ge local kernel info store - /// @return status whether this operation success - /// + /** + * Release related resources of the ge local kernel info store + * @return status whether this operation success + */ Status Finalize() override; - /// - /// Check to see if an operator is fully supported or partially supported. - /// @param op_desc OpDesc information - /// @param reason unsupported reason - /// @return bool value indicate whether the operator is fully supported - /// + /** + * Check to see if an operator is fully supported or partially supported. + * @param op_desc OpDesc information + * @param reason unsupported reason + * @return bool value indicate whether the operator is fully supported + */ bool CheckSupported(const OpDescPtr &op_desc, std::string &reason) const override; - /// - /// Returns the full operator information. - /// @param infos reference of a map, - /// contain operator's name and detailed information - /// + /** + * Returns the full operator information. + * @param infos reference of a map, + * contain operator's name and detailed information + */ void GetAllOpsKernelInfo(std::map &infos) const override; - /// - /// Calc the running size of Operator, - /// then GE will alloc the mem size from runtime - /// @param ge_node Node information - /// @return status whether this operation success - /// + /** + * Calc the running size of Operator, + * then GE will alloc the mem size from runtime + * @param ge_node Node information + * @return status whether this operation success + */ Status CalcOpRunningParam(ge::Node &ge_node) override; - /// - /// call the runtime's interface to generate the task - /// @param node Node information - /// @param context run context info - /// @return status whether this operation success - /// + /** + * call the runtime's interface to generate the task + * @param node Node information + * @param context run context info + * @return status whether this operation success + */ Status GenerateTask(const ge::Node &ge_node, ge::RunContext &context, std::vector &tasks) override; - /// - /// Create session - /// @param session_options Session Options - /// @return status whether this operation success - /// + /** + * Create session + * @param session_options Session Options + * @return status whether this operation success + */ Status CreateSession(const std::map &session_options) override; - /// - /// Destroy session - /// @param session_options Session Options - /// @return status whether this operation success - /// + /** + * Destroy session + * @param session_options Session Options + * @return status whether this operation success + */ Status DestroySession(const std::map &session_options) override; // Copy prohibited @@ -101,12 +101,12 @@ class GeLocalOpsKernelInfoStore : public OpsKernelInfoStore { GeLocalOpsKernelInfoStore &operator=(GeLocalOpsKernelInfoStore &&ops_kernel_store) = delete; private: - /// - /// Calc memSize for constant which type is DT_STRING. - /// @param op_desc OpDesc information - /// @param mem_size output size - /// @return whether this operation success - /// + /** + * Calc memSize for constant which type is DT_STRING. + * @param op_desc OpDesc information + * @param mem_size output size + * @return whether this operation success + */ Status CalcConstantStrMemSize(const OpDescPtr &op_desc, int64_t &mem_size); // store op name and OpInfo key-value pair diff --git a/src/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc b/src/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc index f0535331..8c68abbd 100644 --- a/src/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc +++ b/src/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc @@ -15,7 +15,6 @@ */ #include "ge_local_engine/ops_kernel_store/op/ge_deleted_op.h" - #include "framework/common/debug/ge_log.h" #include "common/ge_inner_error_codes.h" #include "ge_local_engine/ops_kernel_store/op/op_factory.h" diff --git a/src/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h b/src/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h index c551779d..ebaeef2d 100644 --- a/src/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h +++ b/src/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h @@ -31,10 +31,10 @@ class GeDeletedOp : public Op { GeDeletedOp(const GeDeletedOp &op) = delete; - /// - /// @brief generate task. - /// @return result - /// + /** + * @brief generate task. + * @return result + */ ge::Status Run() override; }; } // namespace ge_local diff --git a/src/ge/ge_local_engine/ops_kernel_store/op/no_op.cc b/src/ge/ge_local_engine/ops_kernel_store/op/no_op.cc index 8b44aea7..5bbec472 100644 --- a/src/ge/ge_local_engine/ops_kernel_store/op/no_op.cc +++ b/src/ge/ge_local_engine/ops_kernel_store/op/no_op.cc @@ -15,7 +15,6 @@ */ #include "ge_local_engine/ops_kernel_store/op/no_op.h" - #include "framework/common/debug/ge_log.h" #include "common/ge_inner_error_codes.h" #include "ge_local_engine/ops_kernel_store/op/op_factory.h" @@ -25,7 +24,6 @@ namespace ge_local { NoOp::NoOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} Status NoOp::Run() { - GELOGI("Node:%s type is %s, no need gen task.", name_.c_str(), type_.c_str()); // Do nothing return SUCCESS; } diff --git a/src/ge/ge_local_engine/ops_kernel_store/op/no_op.h b/src/ge/ge_local_engine/ops_kernel_store/op/no_op.h index 80f235dc..31199b25 100644 --- a/src/ge/ge_local_engine/ops_kernel_store/op/no_op.h +++ b/src/ge/ge_local_engine/ops_kernel_store/op/no_op.h @@ -31,10 +31,10 @@ class NoOp : public Op { NoOp(const NoOp &op) = delete; - /// - /// @brief generate task. - /// @return result - /// + /** + * @brief generate task. + * @return result + */ ge::Status Run() override; }; } // namespace ge_local diff --git a/src/ge/ge_local_engine/ops_kernel_store/op/op.cc b/src/ge/ge_local_engine/ops_kernel_store/op/op.cc index 2405391a..0a5625de 100644 --- a/src/ge/ge_local_engine/ops_kernel_store/op/op.cc +++ b/src/ge/ge_local_engine/ops_kernel_store/op/op.cc @@ -15,7 +15,9 @@ */ #include "ge_local_engine/ops_kernel_store/op/op.h" - +#include "framework/common/debug/ge_log.h" +#include "common/ge_inner_error_codes.h" +#include "graph/op_desc.h" #include "graph/utils/anchor_utils.h" #include "graph/utils/tensor_utils.h" diff --git a/src/ge/ge_local_engine/ops_kernel_store/op/op.h b/src/ge/ge_local_engine/ops_kernel_store/op/op.h index 71fda167..1b184dad 100644 --- a/src/ge/ge_local_engine/ops_kernel_store/op/op.h +++ b/src/ge/ge_local_engine/ops_kernel_store/op/op.h @@ -18,19 +18,17 @@ #define GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_OP_OP_H_ #include - #include #include - #include "common/ge_inner_error_codes.h" #include "common/opskernel/ops_kernel_info_types.h" #include "graph/node.h" namespace ge { namespace ge_local { -/// -/// The base class for all op. -/// +/** + * The base class for all op. + */ class Op { public: Op(const Node &node, RunContext &run_context); diff --git a/src/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc b/src/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc index d9d803da..49fc1084 100644 --- a/src/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc +++ b/src/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc @@ -15,7 +15,6 @@ */ #include "ge_local_engine/ops_kernel_store/op/op_factory.h" - #include "framework/common/debug/ge_log.h" #include "common/ge_inner_error_codes.h" #include "graph/op_desc.h" diff --git a/src/ge/ge_local_engine/ops_kernel_store/op/op_factory.h b/src/ge/ge_local_engine/ops_kernel_store/op/op_factory.h index d7e9a2a1..6d0c16f4 100644 --- a/src/ge/ge_local_engine/ops_kernel_store/op/op_factory.h +++ b/src/ge/ge_local_engine/ops_kernel_store/op/op_factory.h @@ -22,7 +22,6 @@ #include #include #include - #include "common/ge/ge_util.h" #include "ge_local_engine/ops_kernel_store/op/op.h" @@ -30,25 +29,27 @@ namespace ge { namespace ge_local { using OP_CREATOR_FUNC = std::function(const Node &, RunContext &)>; -// manage all the op, support create op. +/** + * manage all the op, support create op. + */ class OpFactory { public: static OpFactory &Instance(); - /// - /// @brief create Op. - /// @param [in] node share ptr of node - /// @param [in] run_context run context - /// @return not nullptr success - /// @return nullptr fail - /// + /** + * @brief create Op. + * @param [in] node share ptr of node + * @param [in] run_context run context + * @return not nullptr success + * @return nullptr fail + */ std::shared_ptr CreateOp(const Node &node, RunContext &run_context); - /// - /// @brief Register Op create function. - /// @param [in] type Op type - /// @param [in] func Op create func - /// + /** + * @brief Register Op create function. + * @param [in] type Op type + * @param [in] func Op create func + */ void RegisterCreator(const std::string &type, const OP_CREATOR_FUNC &func); const std::vector &GetAllOps() const { return all_ops_; } diff --git a/src/ge/ge_runtime/CMakeLists.txt b/src/ge/ge_runtime/CMakeLists.txt index 9e9892af..b914b21b 100755 --- a/src/ge/ge_runtime/CMakeLists.txt +++ b/src/ge/ge_runtime/CMakeLists.txt @@ -27,7 +27,6 @@ include_directories(${GE_SOURCE_DIR}/inc/framework/ge_runtime) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) include_directories(${GE_SOURCE_DIR}/third_party/securec/include) -include_directories(${GE_SOURCE_DIR}/third_party/protobuf/src) include_directories(${CMAKE_BINARY_DIR}) include_directories(${CMAKE_BINARY_DIR}/proto/ge) @@ -35,7 +34,6 @@ include_directories(${CMAKE_BINARY_DIR}/proto/ge) file(GLOB_RECURSE GE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "model_runner.cc" "runtime_model.cc" - "op_info_utils.cc" "output.cc" "task/*.cc" ) @@ -46,7 +44,6 @@ target_compile_definitions(ge_runtime PUBLIC Werror) target_link_libraries(ge_runtime graph - ${cce} ${slog} ${runtime} ${c_sec} diff --git a/src/ge/ge_runtime/model_context.h b/src/ge/ge_runtime/model_context.h index 4f580b61..259ff91f 100644 --- a/src/ge/ge_runtime/model_context.h +++ b/src/ge/ge_runtime/model_context.h @@ -18,7 +18,6 @@ #define GE_GE_RUNTIME_MODEL_CONTEXT_H_ #include - #include "runtime/rt_model.h" namespace ge { @@ -28,8 +27,13 @@ class ModelContext { ModelContext(uint32_t device_id, uint64_t session_id, int32_t priority, rtModel_t rt_model_handle, rtStream_t rt_model_stream, const std::vector &stream_list, const std::vector &label_list, const std::vector &event_list) - : device_id_(device_id), session_id_(session_id), priority_(priority), rt_model_handle_(rt_model_handle), - rt_model_stream_(rt_model_stream), stream_list_(stream_list), label_list_(label_list), + : device_id_(device_id), + session_id_(session_id), + priority_(priority), + rt_model_handle_(rt_model_handle), + rt_model_stream_(rt_model_stream), + stream_list_(stream_list), + label_list_(label_list), event_list_(event_list) {} ~ModelContext() {} diff --git a/src/ge/ge_runtime/model_runner.cc b/src/ge/ge_runtime/model_runner.cc index e36901c8..59952e39 100644 --- a/src/ge/ge_runtime/model_runner.cc +++ b/src/ge/ge_runtime/model_runner.cc @@ -15,8 +15,6 @@ */ #include "ge_runtime/model_runner.h" - -#include "./op_info_utils.h" #include "./runtime_model.h" #include "framework/common/debug/ge_log.h" #include "common/ge_inner_error_codes.h" @@ -35,9 +33,7 @@ ModelRunner &ModelRunner::Instance() { return instance; } -bool ModelRunner::LoadDavinciModel(uint32_t device_id, - uint64_t session_id, - uint32_t model_id, +bool ModelRunner::LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint32_t model_id, std::shared_ptr davinci_model, std::shared_ptr listener) { std::shared_ptr model = MakeShared(); @@ -100,12 +96,10 @@ bool ModelRunner::RunModel(uint32_t model_id, const InputData &input_data, Outpu return true; } -bool ModelRunner::GetInputOutputDescInfo(uint32_t model_id, - bool zero_copy, +bool ModelRunner::GetInputOutputDescInfo(uint32_t model_id, bool zero_copy, std::vector *input_desc, std::vector *output_desc, - std::vector *input_format, - std::vector *output_format) { + std::vector *input_format, std::vector *output_format) { if (runtime_models_.find(model_id) == runtime_models_.end()) { GELOGE(PARAM_INVALID, "Model id %u not found.", model_id); return false; diff --git a/src/ge/ge_runtime/op_info_utils.cc b/src/ge/ge_runtime/op_info_utils.cc deleted file mode 100644 index 2decc0a2..00000000 --- a/src/ge/ge_runtime/op_info_utils.cc +++ /dev/null @@ -1,632 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ge_runtime/op_info_utils.h" - -#include -#include - -#include "common/ge_inner_error_codes.h" -#include "common/types.h" -#include "common/util.h" -#include "framework/common/debug/ge_log.h" -#include "runtime/rt_model.h" - -namespace ge { -namespace model_runner { -static const uint32_t kDimMaxSize = 8; -static const uint32_t kPoolMaskDescDimSize = 6; -static const uint32_t kPoolMaskDescWinH = 4; -static const uint32_t kPoolMaskDescWinW = 5; - -bool OpInfoUtils::NeedTransFilter(const std::shared_ptr &data_info) { - if (data_info == nullptr) { - GELOGE(PARAM_INVALID, "data info is null."); - return false; - } - - if (data_info->input_tensors.empty()) { - GELOGE(PARAM_INVALID, "data info input tensors is empty."); - return false; - } - - return static_cast(data_info->input_tensors[0].format) == FORMAT_FILTER_HWCK || - static_cast(data_info->input_tensors[0].format) == FORMAT_HWCN; -} - -bool OpInfoUtils::TransFilterData(const std::shared_ptr &data_info, const void *in_data, uint32_t length) { - GELOGI("Start trans filter data."); - if (data_info == nullptr) { - GELOGE(PARAM_INVALID, "data info ptr is null."); - return false; - } - - if (data_info->input_tensors.empty() || data_info->output_tensors.empty()) { - GELOGE(PARAM_INVALID, "data info input tensors size %zu, output tensor size %zu.", data_info->input_tensors.size(), - data_info->output_tensors.size()); - return false; - } - - if (in_data == nullptr) { - GELOGE(PARAM_INVALID, "In data ptr is null."); - return false; - } - - // Transform to KCHW - GELOGI("copy filter data op: %s, need transfer.", data_info->name.c_str()); - data_info->input_tensors[0].format = static_cast(FORMAT_NCHW); - data_info->input_tensors[0].datatype = static_cast(DT_FLOAT); - data_info->input_tensors[0].dims = std::vector( - {data_info->input_tensors[0].GetDim(kHwckDimK), data_info->input_tensors[0].GetDim(kHwckDimC), - data_info->input_tensors[0].GetDim(kHwckDimH), data_info->input_tensors[0].GetDim(kHwckDimW)}); - - void *out_data = nullptr; - auto total_size = static_cast(data_info->input_tensors[0].GetShapeSize() * sizeof(float)); - if (total_size != length) { - GELOGE(FAILED, "Input filter data length(%u) not correct,need:%u!", length, total_size); - return false; - } - TransDataHWCK2KCHW(in_data, data_info->input_tensors[0].GetDim(kHwckDimH), - data_info->input_tensors[0].GetDim(kHwckDimW), data_info->input_tensors[0].GetDim(kHwckDimC), - data_info->input_tensors[0].GetDim(kHwckDimK), &out_data); - - // Transform to FracZ - // using namespace cce; - cce::ccFilterDescriptor_t input_desc = nullptr; - GE_MAKE_GUARD(input_desc, [&] { - if (input_desc) GE_CHK_CCE(cce::ccDestroyFilterDescriptor(&input_desc)); - }); - cce::ccFilterDescriptor_t output_desc = nullptr; - GE_MAKE_GUARD_FILTER_DESC(output_desc); - bool ret = InitFilterTensorDescriptor(data_info->input_tensors[0].dims, data_info->input_tensors[0].format, - data_info->input_tensors[0].datatype, input_desc); - if (!ret) { - delete[] reinterpret_cast(out_data); - out_data = nullptr; - DestroyFilterDescriptor(input_desc); - GELOGE(INTERNAL_ERROR, "InitTensorDescriptor input_desc failed."); - return false; - } - - ret = InitFilterTensorDescriptor(data_info->output_tensors[0].dims, data_info->input_tensors[0].format, - data_info->input_tensors[0].datatype, output_desc); - if (!ret) { - delete[] reinterpret_cast(out_data); - out_data = nullptr; - DestroyFilterDescriptor(output_desc); - DestroyFilterDescriptor(input_desc); - GELOGE(INTERNAL_ERROR, "InitTensorDescriptor output_desc failed."); - return false; - } - - void *fp16_data_addr = nullptr; - uint32_t output_size = data_info->output_tensors[0].size; - - rtError_t rt_ret = rtMallocHost(&fp16_data_addr, output_size); - if (rt_ret != RT_ERROR_NONE) { - delete[] reinterpret_cast(out_data); - out_data = nullptr; - DestroyFilterDescriptor(output_desc); - DestroyFilterDescriptor(input_desc); - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return false; - } - GE_MAKE_GUARD_RTMEM(fp16_data_addr); - - cce::ccStatus_t cc_ret = cce::ccTransFilter(input_desc, out_data, output_desc, fp16_data_addr, output_size); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - delete[] reinterpret_cast(out_data); - out_data = nullptr; - DestroyFilterDescriptor(output_desc); - DestroyFilterDescriptor(input_desc); - GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); - return false; - } - - delete[] reinterpret_cast(out_data); - out_data = nullptr; - - // Copy input data to data node - const std::vector &outputs = data_info->output_addrs; - if (outputs.empty()) { - GELOGE(PARAM_INVALID, "data_info %s output_addrs is empty.", data_info->name.c_str()); - return false; - } - - rt_ret = rtMemcpy(reinterpret_cast(outputs[0]), output_size, fp16_data_addr, output_size, - RT_MEMCPY_HOST_TO_DEVICE); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return false; - } - - GELOGI("Filter data op transdata end."); - return true; -} - -bool OpInfoUtils::InitFilterTensorDescriptor(const std::vector &dims, uint32_t format, uint32_t dtype, - cce::ccFilterDescriptor_t &cc_tensor) { - if (dims.empty()) { - GELOGE(FAILED, "Invalid dim size"); - return false; - } - cce::ccTensorFormat_t cc_format = cce::tagCcTensorFormat(format); - cce::ccDataType_t data_type = cce::tagCcDataType(dtype); - if (cc_format != cce::CC_TENSOR_NCHW && cc_format != cce::CC_TENSOR_FRACTAL_Z && cc_format != cce::CC_TENSOR_HWCN) { - GELOGE(PARAM_INVALID, "Filter tensor cc_format:%u not correct.", format); - return false; - } - if (dims.size() <= static_cast(kNchwDimW)) { - GELOGE(PARAM_INVALID, "Array index is invalid!"); - return false; - } - - // Create tensor descriptor - cce::ccStatus_t cc_ret = cce::ccCreateFilterDescriptor(&cc_tensor); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); - return false; - } - - if (cc_format == cce::CC_TENSOR_FRACTAL_Z) { - cc_ret = cce::ccSetFilterFractalDescriptor( - cc_tensor, cc_format, data_type, static_cast(dims[kNchwDimN]), static_cast(dims[kNchwDimC]), - static_cast(dims[kNchwDimH]), static_cast(dims[kNchwDimW])); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); - return false; - } - } else if (cc_format == cce::CC_TENSOR_HWCN) { - cc_ret = cce::ccSetFilterFractalDescriptor( - cc_tensor, cc_format, data_type, static_cast(dims[kNchwDimW]), static_cast(dims[kNchwDimH]), - static_cast(dims[kNchwDimN]), static_cast(dims[kNchwDimC])); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); - return false; - } - } else { - cc_ret = cce::ccSetFilterFractalDescriptor( - cc_tensor, cc_format, data_type, static_cast(dims[kNchwDimN]), static_cast(dims[kNchwDimC]), - static_cast(dims[kNchwDimH]), static_cast(dims[kNchwDimW])); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); - return false; - } - } - return true; -} - -void OpInfoUtils::DestroyFilterDescriptor(cce::ccFilterDescriptor_t &cc_filter) { - if (cc_filter != nullptr) { - cce::ccStatus_t cc_ret = ccDestroyFilterDescriptor(&cc_filter); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "ccDestroyFilterDescriptor failed. ret = %d", static_cast(cc_ret)); - } - - cc_filter = nullptr; - } -} - -void OpInfoUtils::DestroyTensorDescriptor(cce::ccTensorDescriptor_t &cc_tensor) { - if (cc_tensor != nullptr) { - cce::ccStatus_t cc_ret = cce::ccDestroyTensorDescriptor(&cc_tensor); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); - return; - } - cc_tensor = nullptr; - } -} - -bool OpInfoUtils::IsInputTensorNeedTrans(const std::shared_ptr &data_info) { - if (data_info == nullptr) { - GELOGE(PARAM_INVALID, "data info is null."); - return false; - } - - if (data_info->input_tensors.empty() || data_info->output_tensors.empty()) { - GELOGE(PARAM_INVALID, "data info input tensors size %zu, output tensor size %zu.", data_info->input_tensors.size(), - data_info->output_tensors.size()); - return false; - } - - if (static_cast(data_info->output_tensors[0].format) == FORMAT_NC1HWC0 && - static_cast(data_info->output_tensors[0].datatype) == DT_INT8) { - // AIPP input,Consider compatibility and judge according to this condition. - // Add attribute in data node to mark whether it is AIPP - return false; - } - - return data_info->input_tensors[0].format != data_info->output_tensors[0].format || - data_info->input_tensors[0].datatype != data_info->output_tensors[0].datatype; -} - -void OpInfoUtils::TransDataHWCK2KCHW(const void *input, int64_t H, int64_t W, int64_t C, int64_t K, void **output) { - if (input == nullptr) { - return; - } - if (output == nullptr) { - return; - } - const char *w_data = reinterpret_cast(input); - - int64_t count = H * W * C * K; - if (count <= 0) { - GELOGE(PARAM_INVALID, "Count value must be greater than 0, but count = %ld", count); - return; - } - - float *buf = new (std::nothrow) float[count](); - if (buf == nullptr) { - GELOGE(MEMALLOC_FAILED, "Buf must not be null."); - return; - } - - const float *src_buff = nullptr; - float *dst_buff = nullptr; - for (int64_t h = 0; h < H; ++h) { - for (int64_t w = 0; w < W; ++w) { - for (int64_t c = 0; c < C; ++c) { - for (int64_t k = 0; k < K; ++k) { - src_buff = reinterpret_cast(w_data) + ((h * W * C * K) + (w * C * K) + (c * K) + (k)); - dst_buff = buf + ((k * C * H * W) + (c * H * W) + (h * W) + (w)); - *dst_buff = *src_buff; - } - } - } - } - *output = buf; -} - -bool OpInfoUtils::IsComputDimsSize(const uint32_t format, const uint32_t real_dim_cnt) { - return ((format == static_cast(cce::CC_TENSOR_ND)) || - ((format != static_cast(cce::CC_TENSOR_NC1KHKWHWC0)) && - (format != static_cast(cce::CC_TENSOR_C1HWNCoC0)) && - (real_dim_cnt > static_cast(DIM_DEFAULT_SIZE)))); -} - -static const auto set_real_dim_cnt = [](uint32_t real_dim_cnt, const std::vector &dim) { - return static_cast(((real_dim_cnt == 0) && (dim.size() > DIM_DEFAULT_SIZE)) ? dim.size() - : real_dim_cnt); -}; - -bool OpInfoUtils::InitTensorDescriptor(uint32_t format, uint32_t data_type, const std::vector &dim, - cce::ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt) { - cce::ccDataType_t data_type_ = cce::tagCcDataType(data_type); - - real_dim_cnt = set_real_dim_cnt(real_dim_cnt, dim); - - if (IsComputDimsSize(format, real_dim_cnt)) { - // (Format is ND) or (Dimension is greater than 4 and format is not NC1KHKWHWC0 or C1HWNCoC0) - return InitTensorNdDescriptor(data_type, dim, cc_tensor, real_dim_cnt); - } else if (format == static_cast(cce::CC_TENSOR_NC1KHKWHWC0)) { - return InitTensorPoolingMaskDescriptor(format, data_type, dim, cc_tensor, real_dim_cnt); - } else if (format == static_cast(cce::CC_TENSOR_C1HWNCoC0)) { - return InitTensor6dDescriptor(format, data_type, dim, cc_tensor, real_dim_cnt); - } - std::vector dim_vector; - TransferDim(dim, dim_vector); - - if (!CheckParam(format, data_type, dim_vector)) { - GELOGE(PARAM_INVALID, "Check param fail."); - return false; - } - - // Create tensor descriptor - cce::ccStatus_t cc_ret = cce::ccCreateTensorDescriptor(&cc_tensor); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); - return false; - } - - // The last two outputs of fusedbatchnormgrad are 0. The special processing of fusedbatchnormgrad - if (dim.size() == 1 && dim[0] == 0) { - (void)cce::ccSetTensorRealDimCnt(cc_tensor, real_dim_cnt); - (void)cce::ccDestroyTensorDescriptor(&cc_tensor); - cc_tensor = nullptr; - return false; - } - - if (format >= static_cast(cce::CC_TENSOR_HASHTABLE_LOOKUP_LOOKUPS) && - format <= static_cast(cce::CC_TENSOR_HASHTABLE_LOOKUP_HITS)) { - int32_t dims[dim.size()]; - for (size_t i = 0; i < dim.size(); ++i) { - dims[i] = static_cast(dim[i]); - } - - cc_ret = cce::ccSetTensorNdDescriptor(cc_tensor, data_type_, dim.size(), dims); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "Call cce api failed, ret: %d", static_cast(cc_ret)); - (void)cce::ccDestroyTensorDescriptor(&cc_tensor); - cc_tensor = nullptr; - return false; - } - - cce::ccTensorFormat_t tensor_format = cce::tagCcTensorFormat(format); - cc_ret = cce::ccSetTensorFormat(cc_tensor, tensor_format); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "Call cce api failed, ret: %d", static_cast(cc_ret)); - (void)cce::ccDestroyTensorDescriptor(&cc_tensor); - cc_tensor = nullptr; - return false; - } - - cc_ret = cce::ccSetTensorRealDimCnt(cc_tensor, real_dim_cnt); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); - (void)cce::ccDestroyTensorDescriptor(&cc_tensor); - cc_tensor = nullptr; - return false; - } - - return true; - } else if (format == static_cast(cce::CC_TENSOR_NHWC)) { - return InitTensor4dDescriptor(format, data_type, cc_tensor, static_cast(dim_vector.at(0)), - static_cast(dim_vector.at(3)), static_cast(dim_vector.at(1)), - static_cast(dim_vector.at(2)), real_dim_cnt); - } else if (format == static_cast(cce::CC_TENSOR_HWCN)) { - return InitTensor4dDescriptor(format, data_type, cc_tensor, static_cast(dim_vector.at(3)), - static_cast(dim_vector.at(2)), static_cast(dim_vector.at(0)), - static_cast(dim_vector.at(1)), real_dim_cnt); - } - - // else default - return InitTensor4dDescriptor(format, data_type, cc_tensor, static_cast(dim_vector.at(0)), - static_cast(dim_vector.at(1)), static_cast(dim_vector.at(2)), - static_cast(dim_vector.at(3)), real_dim_cnt); -} - -void OpInfoUtils::TransferDim(const std::vector &dim, std::vector &dim_vector) { - uint32_t input_shape_size = static_cast(dim.size()); - std::list new_dim_list; - - for (auto dim_temp : dim) { - new_dim_list.push_back(dim_temp); - } - if (input_shape_size > static_cast(DIM_DEFAULT_SIZE)) { - dim_vector = dim; - GELOGI("The size of dim_vector is %u, do not to transfer dim", input_shape_size); - return; - } - switch (input_shape_size) { - case 0: { - new_dim_list.push_back(1); - new_dim_list.push_back(1); - new_dim_list.push_back(1); - new_dim_list.push_back(1); - break; - } - case 1: { - new_dim_list.push_front(1); - new_dim_list.push_back(1); - new_dim_list.push_back(1); - break; - } - case 2: { - new_dim_list.push_front(1); - new_dim_list.push_back(1); - break; - } - case 3: { - new_dim_list.push_front(1); - break; - } - default: {} - } - - dim_vector.clear(); - for (auto new_dim : new_dim_list) { - dim_vector.push_back(new_dim); - } -} - -bool OpInfoUtils::InitTensorNdDescriptor(uint32_t data_type, const std::vector &dim, - cce::ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt) { - cce::ccDataType_t data_type_ = cce::tagCcDataType(data_type); - cce::ccStatus_t cc_ret = cce::ccCreateTensorDescriptor(&cc_tensor); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); - return false; - } - - int32_t *real_dim = nullptr; - if (real_dim_cnt > 0) { - real_dim = new (std::nothrow) int32_t[real_dim_cnt]; - if (real_dim == nullptr) { - GELOGE(FAILED, "Failed to malloc memory"); - return false; - } - } - - for (size_t i = 0; i < dim.size(); ++i) { - if (i >= real_dim_cnt || i >= kDimMaxSize) { - break; - } - real_dim[i] = static_cast(dim[i]); - } - - cc_ret = cce::ccSetTensorNdDescriptor(cc_tensor, data_type_, real_dim_cnt, real_dim); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); - (void)cce::ccDestroyTensorDescriptor(&cc_tensor); - cc_tensor = nullptr; - delete[] real_dim; - return false; - } - - delete[] real_dim; - return true; -} - -bool OpInfoUtils::InitTensorPoolingMaskDescriptor(uint32_t format, uint32_t data_type, const std::vector &dim, - cce::ccTensorDescriptor_t &cc_tensor, uint32_t) { - cce::ccStatus_t cc_ret = cce::ccCreatePoolingMaskDescriptor(&cc_tensor); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); - return false; - } - cce::ccTensorFormat_t format_ = cce::tagCcTensorFormat(format); - cce::ccDataType_t data_type_ = cce::tagCcDataType(data_type); - - if (dim.size() != kPoolMaskDescDimSize) { - GELOGE(PARAM_INVALID, "The dim size of format CC_TENSOR_NC1KHKWHWC0 must be 6,dim size id %zu.", dim.size()); - (void)cce::ccDestroyTensorDescriptor(&cc_tensor); - cc_tensor = nullptr; - return false; - } - - cc_ret = cce::ccSetPoolingMaskTensorDescriptor( - cc_tensor, format_, data_type_, static_cast(dim[kNchwDimN]), static_cast(dim[kNchwDimC]), - static_cast(dim[kNchwDimH]), static_cast(dim[kNchwDimW]), - static_cast(dim[kPoolMaskDescWinH]), static_cast(dim[kPoolMaskDescWinW])); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); - (void)cce::ccDestroyTensorDescriptor(&cc_tensor); - cc_tensor = nullptr; - return false; - } - - return true; -} - -bool OpInfoUtils::InitTensor6dDescriptor(uint32_t format, uint32_t data_type, const std::vector &dim, - cce::ccTensorDescriptor_t &cc_tensor, uint32_t) { - cce::ccDataType_t data_type_ = cce::tagCcDataType(data_type); - cce::ccStatus_t cc_ret = cce::ccCreateTensorDescriptor(&cc_tensor); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); - return false; - } - - cce::ccTensorFormat_t format_ = cce::tagCcTensorFormat(format); - if (dim.size() != static_cast(DIM_C1HWNCoC0_SIZE)) { - GELOGE(PARAM_INVALID, "The dim size of format C1HWNCoC0_DIM_SIZE must be 5,dim size id %zu.", dim.size()); - (void)cce::ccDestroyTensorDescriptor(&cc_tensor); - cc_tensor = nullptr; - return false; - } - - cc_ret = cce::ccSetFilter6dDescriptor( - cc_tensor, format_, data_type_, static_cast(dim[C1HWNCoC0_DIM_C1]), - static_cast(dim[C1HWNCoC0_DIM_H]), static_cast(dim[C1HWNCoC0_DIM_W]), - static_cast(dim[C1HWNCoC0_DIM_N]), static_cast(dim[C1HWNCoC0_DIM_Co]), - static_cast(dim[C1HWNCoC0_DIM_C0])); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); - (void)cce::ccDestroyTensorDescriptor(&cc_tensor); - cc_tensor = nullptr; - return false; - } - - return true; -} - -bool OpInfoUtils::InitTensor4dDescriptor(uint32_t format, uint32_t data_type, cce::ccTensorDescriptor_t &cc_tensor, - int32_t n, int32_t c, int32_t h, int32_t w, uint32_t real_dim_cnt) { - cce::ccDataType_t data_type_ = cce::tagCcDataType(data_type); - cce::ccTensorFormat_t format_ = cce::tagCcTensorFormat(format); - auto cc_ret = cce::ccSetTensor4dDescriptor(cc_tensor, format_, data_type_, n, c, h, w); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); - (void)cce::ccDestroyTensorDescriptor(&cc_tensor); - cc_tensor = nullptr; - return false; - } - - cc_ret = cce::ccSetTensorRealDimCnt(cc_tensor, real_dim_cnt); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); - (void)cce::ccDestroyTensorDescriptor(&cc_tensor); - cc_tensor = nullptr; - return false; - } - - return true; -} - -bool OpInfoUtils::CheckParam(uint32_t format, uint32_t data_type, const std::vector &dim_vector) { - // format - if (format >= static_cast(cce::CC_TENSOR_RESERVED)) { - GELOGE(PARAM_INVALID, "Not supported format, format = %u", format); - return false; - } - - // data type - if (data_type >= static_cast(cce::CC_DATA_RESERVED)) { - GELOGE(PARAM_INVALID, "Not supported data type, type = %u", data_type); - return false; - } - - // input shape - auto input_shape_size = dim_vector.size(); - if (input_shape_size != static_cast(DIM_DEFAULT_SIZE)) { - GELOGW("input_shape_size is %u", input_shape_size); - } - - return true; -} - -bool OpInfoUtils::GetOutputSize(const std::shared_ptr &op_info, std::vector &output_size_list, - std::vector &output_memory_size_list) { - if (op_info == nullptr) { - GELOGE(PARAM_INVALID, "op info is null."); - return false; - } - - for (size_t i = 0; i < op_info->output_tensors.size(); ++i) { - auto output_desc = op_info->output_tensors[i]; - bool output_tensor = op_info->output_tensors[i].is_output; - - if (output_tensor) { - // Recalculate the size directly using desc of net output op. - cce::ccTensorDescriptor_t cctensor = nullptr; - bool status = InitTensorDescriptor(output_desc.format, output_desc.datatype, output_desc.dims, cctensor, - output_desc.real_dim_cnt); - if (!status) { - GELOGE(FAILED, "InitTensorDescriptor fail."); - return false; - } - // Call the API of CCE to obtain the converted size and other parameters. - uint32_t size = 0; - uint32_t memory_size = 0; - auto cc_ret0 = cce::ccGetTensorSizeInBytes(cctensor, &size); - auto cc_ret1 = cce::ccGetTensorMemorySizeInBytes(cctensor, &memory_size); - DestroyTensorDescriptor(cctensor); - if (cc_ret0 != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "ccGetTensorSizeInBytes fail, ret = 0x%X.", cc_ret0); - return false; - } - if (cc_ret1 != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "ccGetTensorMemorySizeInBytes fail, ret = 0x%X.", cc_ret1); - return false; - } - - output_size_list.push_back(size); - output_memory_size_list.push_back(memory_size); - } - } - - if (output_size_list.size() != output_memory_size_list.size()) { - GELOGE(INTERNAL_ERROR, "Output size list length %zu not equal output memory size list length %zu.", - output_size_list.size(), output_memory_size_list.size()); - return false; - } - - return true; -} - -} // namespace model_runner -} // namespace ge diff --git a/src/ge/ge_runtime/op_info_utils.h b/src/ge/ge_runtime/op_info_utils.h deleted file mode 100644 index 8938f76d..00000000 --- a/src/ge/ge_runtime/op_info_utils.h +++ /dev/null @@ -1,81 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GE_RUNTIME_OP_INFO_UTILS_H_ -#define GE_GE_RUNTIME_OP_INFO_UTILS_H_ - -#include -#include -#include - -#include "cce/dnn.h" -#include "ge_runtime/op_info.h" -#include "graph/op_desc.h" -#include "common/ge_types.h" -#include "runtime/rt_model.h" - -namespace ge { -namespace model_runner { - -const uint32_t kNchwDimN = 0; -const uint32_t kNchwDimC = 1; -const uint32_t kNchwDimH = 2; -const uint32_t kNchwDimW = 3; - -const uint32_t kNhwcDimN = 0; -const uint32_t kNhwcDimH = 1; -const uint32_t kNhwcDimW = 2; -const uint32_t kNhwcDimC = 3; - -const uint32_t kHwckDimH = 0; -const uint32_t kHwckDimW = 1; -const uint32_t kHwckDimC = 2; -const uint32_t kHwckDimK = 3; - -const string kNetOutPut = "NetOutput"; - -class OpInfoUtils { - public: - static bool InitTensorDescriptor(uint32_t format, uint32_t data_type, const std::vector &dim, - cce::ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt = 0); - static void DestroyTensorDescriptor(cce::ccTensorDescriptor_t &cc_tensor); - static bool NeedTransFilter(const std::shared_ptr &data_info); - static bool TransFilterData(const std::shared_ptr &data_info, const void *in_data, uint32_t length); - static bool IsInputTensorNeedTrans(const std::shared_ptr &data_info); - static bool GetOutputSize(const std::shared_ptr &op_info, std::vector &output_size_list, - std::vector &output_memory_size_list); - - private: - static bool InitFilterTensorDescriptor(const std::vector &dims, uint32_t format, uint32_t dtype, - cce::ccFilterDescriptor_t &cc_tensor); - static void TransDataHWCK2KCHW(const void *input, int64_t H, int64_t W, int64_t C, int64_t K, void **output); - static void DestroyFilterDescriptor(cce::ccFilterDescriptor_t &cc_filter); - static bool IsComputDimsSize(const uint32_t format, const uint32_t real_dim_cnt); - static void TransferDim(const std::vector &dim, std::vector &dim_vector); - static bool InitTensorNdDescriptor(uint32_t data_type, const std::vector &dim, - cce::ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt); - static bool InitTensorPoolingMaskDescriptor(uint32_t format, uint32_t data_type, const std::vector &dim, - cce::ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt); - static bool InitTensor6dDescriptor(uint32_t format, uint32_t data_type, const std::vector &dim, - cce::ccTensorDescriptor_t &cc_tensor, uint32_t real_dim_cnt); - static bool InitTensor4dDescriptor(uint32_t format, uint32_t data_type, cce::ccTensorDescriptor_t &cc_tensor, - int32_t n, int32_t c, int32_t h, int32_t w, uint32_t real_dim_cnt); - static bool CheckParam(uint32_t format, uint32_t data_type, const std::vector &dim_vector); -}; -} // namespace model_runner -} // namespace ge - -#endif // GE_GE_RUNTIME_OP_INFO_UTILS_H_ diff --git a/src/ge/ge_runtime/output.cc b/src/ge/ge_runtime/output.cc index a3922dd9..2f4ade89 100644 --- a/src/ge/ge_runtime/output.cc +++ b/src/ge/ge_runtime/output.cc @@ -15,17 +15,10 @@ */ #include "ge_runtime/output.h" - -#include "./op_info_utils.h" -#include "cce/dnn_base.h" -#include "cce/dnn_base_def.hpp" #include "common/ge_inner_error_codes.h" #include "common/util.h" #include "framework/common/debug/ge_log.h" -using cce::ccTensorDescriptor_t; -using cce::ccDestroyTensorDescriptor; - namespace ge { namespace model_runner { Output::Output(const OpInfoPtr &op_info, const std::shared_ptr &model) @@ -81,7 +74,7 @@ bool Output::CopyRslt(OutputData *rslt, uint32_t data_begin, uint32_t &data_inde for (size_t i = 0; i < input_num_; i++) { DataBuffer data_buf = rslt->blobs[data_begin + data_count]; - bool ret = SetDataBuf(data_buf, data_count, i, support_mem_share); + bool ret = SetDataBuf(data_buf, data_begin, data_count, i, support_mem_share); if (!ret) { GELOGE(FAILED, "Copy data to host error. index: %lu", i); return ret; @@ -92,56 +85,9 @@ bool Output::CopyRslt(OutputData *rslt, uint32_t data_begin, uint32_t &data_inde return true; } -bool Output::SetDataBuf(DataBuffer &data_buf, uint32_t &data_count, size_t i, bool support_mem_share) { - if (op_info_ == nullptr) { - GELOGE(FAILED, "op_info_ is null"); - return false; - } - if (data_buf.length == 0) { - ++data_count; - GELOGD("data_buf.length = 0,do not need copy, output op : %s, output tensor index : %zu!", - op_info_->name.c_str(), i); - return true; - } - - ccTensorDescriptor_t cc_tensor_desc = nullptr; - GE_MAKE_GUARD_TENSOR(cc_tensor_desc); - - if (i >= op_info_->input_tensors.size()) { - GELOGE(FAILED, "tensor_info is null"); - return false; - } - - auto tensor_info = op_info_->input_tensors.at(i); - - if (data_buf.isDataSupportMemShare && support_mem_share) { - GELOGI("No need to copy input data, user's output data buffer can be shared."); - } else { - // copy result to Databuf - uint32_t size = v_input_size_[i]; - GELOGI("Tensor data size before: %u", size); - if (!OpInfoUtils::InitTensorDescriptor(tensor_info.format, tensor_info.datatype, tensor_info.dims, - cc_tensor_desc)) { - GELOGE(FAILED, "OpUtils::InitTensorDescriptor tensorDesc failed."); - return false; - } - if (ccGetTensorSizeInBytes(cc_tensor_desc, &size) != CC_STATUS_SUCCESS) { - return false; - } - rtError_t rt_ret = rtMemcpy(data_buf.data, size, v_input_data_addr_[i], size, RT_MEMCPY_DEVICE_TO_HOST); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "rtmemcpy error"); - return false; - } - GELOGI("Tensor data size: %u data_buflength: %u", size, data_buf.length); - OpInfoUtils::DestroyTensorDescriptor(cc_tensor_desc); - } - - ++data_count; - GELOGD("Successfully copy the output tensor memory to buffer, output op : %s, output tensor index : %lu!", - op_info_->name.c_str(), i); - - return false; +bool Output::SetDataBuf(DataBuffer &data_buf, uint32_t data_begin, uint32_t &data_count, size_t i, + bool support_mem_share) { + return true; } } // namespace model_runner diff --git a/src/ge/ge_runtime/output.h b/src/ge/ge_runtime/output.h index 24f86923..1f7f91ee 100644 --- a/src/ge/ge_runtime/output.h +++ b/src/ge/ge_runtime/output.h @@ -19,7 +19,6 @@ #include #include - #include "ge_runtime/davinci_model.h" #include "common/ge_types.h" @@ -34,7 +33,7 @@ class Output { bool CopyRslt(OutputData *rslt, uint32_t data_begin, uint32_t &data_index, bool support_mem_share); - bool SetDataBuf(DataBuffer &data_buf, uint32_t &data_count, size_t i, bool support_mem_share); + bool SetDataBuf(DataBuffer &data_buf, uint32_t data_begin, uint32_t &data_count, size_t i, bool support_mem_share); // Copy assignment operator and copy constructor are deleted Output &operator=(const Output &output) = delete; diff --git a/src/ge/ge_runtime/runtime_model.cc b/src/ge/ge_runtime/runtime_model.cc index 14b478f3..5573fa89 100644 --- a/src/ge/ge_runtime/runtime_model.cc +++ b/src/ge/ge_runtime/runtime_model.cc @@ -15,9 +15,7 @@ */ #include "ge_runtime/runtime_model.h" - #include - #include "./model_context.h" #include "./task/task.h" #include "framework/common/debug/ge_log.h" @@ -26,11 +24,11 @@ #include "common/util.h" #include "framework/common/op/op_parser_util.h" #include "graph/types.h" -#include "ge_runtime/op_info_utils.h" #include "task/task_factory.h" namespace ge { namespace model_runner { + RuntimeModel::~RuntimeModel() { GELOGI("RuntimeModel destructor start"); @@ -41,17 +39,17 @@ RuntimeModel::~RuntimeModel() { RtModelUnbindStream(); // Release all task related streams - RtStreamDestroy(); + RtStreamDestory(); // Release rtlabel resource - RtLabelDestroy(); + RtLabelDestory(); // Release rtEvent resourece - RtEventDestroy(); + RtEventDestory(); - GELOGI("Do RtModelDestroy"); + GELOGI("Do RtModelDestory"); // Release all rt_model - RtModelDestroy(); + RtModelDestory(); } bool RuntimeModel::InitStream(std::shared_ptr &davinci_model) { @@ -77,8 +75,8 @@ bool RuntimeModel::InitStream(std::shared_ptr &davinci_model) { for (uint32_t i = 0; i < davinci_model->GetStreamNum(); ++i) { rtStream_t stream = nullptr; uint32_t flag = (force_copy_streams.find(i) != force_copy_streams.end()) - ? (RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY) - : (RT_STREAM_PERSISTENT); + ? (RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY) + : (RT_STREAM_PERSISTENT); rtError_t rt_ret = rtStreamCreateWithFlags(&stream, davinci_model->GetPriority(), flag); if (rt_ret != RT_ERROR_NONE) { @@ -287,7 +285,7 @@ void RuntimeModel::RtModelUnbindStream() noexcept { } } -void RuntimeModel::RtStreamDestroy() noexcept { +void RuntimeModel::RtStreamDestory() noexcept { if (rtStreamDestroy(rt_model_stream_) != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Destroy stream for rt_model failed!"); return; @@ -301,7 +299,7 @@ void RuntimeModel::RtStreamDestroy() noexcept { } } -void RuntimeModel::RtLabelDestroy() noexcept { +void RuntimeModel::RtLabelDestory() noexcept { for (size_t i = 0; i < label_list_.size(); i++) { if (rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Destroy label failed! Index: %zu.", i); @@ -310,7 +308,7 @@ void RuntimeModel::RtLabelDestroy() noexcept { } } -void RuntimeModel::RtModelDestroy() noexcept { +void RuntimeModel::RtModelDestory() noexcept { rtError_t ret = rtModelDestroy(rt_model_handle_); if (ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret); @@ -318,7 +316,7 @@ void RuntimeModel::RtModelDestroy() noexcept { } } -void RuntimeModel::RtEventDestroy() noexcept { +void RuntimeModel::RtEventDestory() noexcept { for (size_t i = 0; i < event_list_.size(); i++) { if (rtEventDestroy(event_list_[i]) != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Destroy event failed! Index: %zu", i); @@ -327,57 +325,7 @@ void RuntimeModel::RtEventDestroy() noexcept { } } -bool RuntimeModel::InitDataInfo(std::shared_ptr &davinci_model) { - if (davinci_model == nullptr) { - GELOGE(PARAM_INVALID, "davinci model is null"); - return false; - } - data_info_list_ = davinci_model->GetDataInfoList(); - for (auto &data_info : data_info_list_) { - cce::ccTensorDescriptor_t input_desc = nullptr; - cce::ccTensorDescriptor_t output_desc = nullptr; - if (data_info == nullptr) { - GELOGE(PARAM_INVALID, "data info ptr is null."); - return false; - } - - if (data_info->input_tensors.empty() || data_info->output_tensors.empty()) { - GELOGE(PARAM_INVALID, "data info input tensors size %zu, output tensor size %zu.", - data_info->input_tensors.size(), data_info->output_tensors.size()); - return false; - } - - if (static_cast(data_info->input_tensors[0].format) != FORMAT_FILTER_HWCK) { - bool ret = OpInfoUtils::InitTensorDescriptor(data_info->input_tensors[0].format, - data_info->input_tensors[0].datatype, - data_info->input_tensors[0].dims, input_desc, - data_info->input_tensors[0].real_dim_cnt); - if (!ret) { - GELOGE(FAILED, "InitTensorDescriptor Fail."); - OpInfoUtils::DestroyTensorDescriptor(input_desc); - return false; - } - - input_tensor_desc_list_[data_info->name] = input_desc; - } - - if (static_cast(data_info->output_tensors[0].format) != FORMAT_FRACTAL_Z) { - bool ret = OpInfoUtils::InitTensorDescriptor(data_info->output_tensors[0].format, - data_info->output_tensors[0].datatype, - data_info->output_tensors[0].dims, output_desc, - data_info->output_tensors[0].real_dim_cnt); - if (!ret) { - GELOGE(FAILED, "InitTensorDescriptor Fail."); - OpInfoUtils::DestroyTensorDescriptor(output_desc); - return false; - } - - output_tensor_desc_list_[data_info->name] = output_desc; - } - } - - return true; -} +bool RuntimeModel::InitDataInfo(std::shared_ptr &davinci_model) { return true; } bool RuntimeModel::InitOutputInfo(std::shared_ptr &davinci_model) { if (davinci_model == nullptr) { @@ -413,46 +361,7 @@ bool RuntimeModel::CopyInputData(const InputData &input_data) { } bool RuntimeModel::CopyInputDataToModel(const std::vector &data, const std::shared_ptr &data_info) { - if (data_info == nullptr) { - GELOGE(PARAM_INVALID, "data info is empty."); - return false; - } - GELOGI("Start copy input data to model, data info: %s.", data_info->name.c_str()); - if (data.empty()) { - GELOGE(PARAM_INVALID, "data buffer is empty."); - return false; - } - - // Check size - if (data_info->input_tensors.size() != 1 || data_info->output_tensors.size() != 1) { - GELOGE(PARAM_INVALID, "Data Op has invalid input_desc_size(%zu) or output_desc_size(%zu)", - data_info->input_tensors.size(), data_info->output_tensors.size()); - return false; - } - - // Process filter weight input while online - if (OpInfoUtils::NeedTransFilter(data_info)) { - bool ret = OpInfoUtils::TransFilterData(data_info, data[data_info->index].data, data[data_info->index].length); - if (!ret) { - GELOGE(FAILED, "TransFilterData fail."); - return false; - } - return true; - } - - if (data_info->input_tensors[0].size >= data[data_info->index].length) { - GELOGE(PARAM_INVALID, "The input data size(%u) does not match model required size(%u), ret fail.", - data[data_info->index].length, data_info->input_tensors[0].size); - return false; - } - - // float to float16 - bool need_trans_flag = OpInfoUtils::IsInputTensorNeedTrans(data_info); - if (need_trans_flag) { - return CopyTransData(data, data_info); - } else { - return CopyHostData(data, data_info); - } + return true; } bool RuntimeModel::CopyHostData(const std::vector &data, const std::shared_ptr &data_info) const { @@ -490,56 +399,6 @@ bool RuntimeModel::CopyHostData(const std::vector &data, const std:: } bool RuntimeModel::CopyTransData(const std::vector &data, const std::shared_ptr &data_info) { - GELOGI("Start CopyTransData."); - if (data.empty()) { - GELOGE(PARAM_INVALID, "data buffer is empty."); - return false; - } - - if (data_info == nullptr) { - GELOGE(PARAM_INVALID, "data info is null."); - return false; - } - - if (data_info->output_tensors.empty()) { - GELOGE(PARAM_INVALID, "data info output tensors is empty."); - return false; - } - - const std::vector &outputs = data_info->output_addrs; - if (outputs.empty()) { - GELOGE(PARAM_INVALID, "output addrs is empty."); - return false; - } - - void *fp16_data_addr = nullptr; - uint32_t copy_size = data_info->output_tensors[0].size; - GE_MAKE_GUARD_RTMEM(fp16_data_addr); - - rtError_t rt_ret = rtMallocHost(&fp16_data_addr, copy_size); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return false; - } - - cce::ccStatus_t cc_ret = cce::ccTransTensor(input_tensor_desc_list_[data_info->name], data[data_info->index].data, - output_tensor_desc_list_[data_info->name], fp16_data_addr, copy_size); - if (cc_ret != cce::CC_STATUS_SUCCESS) { - GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); - return false; - } - void *host_data_addr = fp16_data_addr; - - GELOGI("data output tensor is not aipp tensor,call cce trans tensor."); - GELOGI("output[0]=%ld, copy_size=%u", outputs[0], copy_size); - - rt_ret = rtMemcpy(reinterpret_cast(outputs[0]), copy_size, host_data_addr, copy_size, - RT_MEMCPY_HOST_TO_DEVICE); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - return false; - } - return true; } @@ -568,8 +427,8 @@ bool RuntimeModel::InitConstantInfo(std::shared_ptr &davinci_model } if (constant->output_tensors[0].size < constant->weight_data.size()) { - GELOGE(PARAM_INVALID, "Output size:%u less than weight data size:%zu", - constant->output_tensors[0].size, constant->weight_data.size()); + GELOGE(PARAM_INVALID, "Output size:%u less than weight data size:%zu", constant->output_tensors[0].size, + constant->weight_data.size()); return false; } @@ -583,8 +442,8 @@ bool RuntimeModel::InitConstantInfo(std::shared_ptr &davinci_model /// The logic of GetShapeSize is wrong, the scaler tensor's GetShapeSize is zero /// and that of unknown shape is zero too. /// Unknown shape will not appear here, so we can use zero judge a tensor is scaler or not. - int64_t elem_num = (constant->weight_tensors[0].GetShapeSize() == 0) ? - 1 : constant->weight_tensors[0].GetShapeSize(); + int64_t elem_num = + (constant->weight_tensors[0].GetShapeSize() == 0) ? 1 : constant->weight_tensors[0].GetShapeSize(); if (constant->weight_data.size() < sizeof(uint64_t)) { GELOGE(FAILED, "weight_data size is smaller than sizeof(uint64_t)"); return false; @@ -608,226 +467,22 @@ bool RuntimeModel::InitConstantInfo(std::shared_ptr &davinci_model return true; } -bool RuntimeModel::GetInputOutputDescInfo(bool zero_copy, - std::vector *input_desc, +bool RuntimeModel::GetInputOutputDescInfo(bool zero_copy, std::vector *input_desc, std::vector *output_desc, - std::vector *input_format, - std::vector *output_format) { - if ((data_info_list_.empty()) || (data_info_list_[0]->input_tensors.size() != 1)) { - // Maybe there is no datainput info while online - if (!zero_copy && input_format == nullptr) { - GELOGW("Data op List is null or input_desc size is not 1!"); - } else { - GELOGE(FAILED, "Data op List is null or input_desc size is not 1!"); - return false; - } - } - - bool ret = GetInputDescInfo(input_desc, input_format); - if (!ret) { - GELOGE(FAILED, "Get input desc info failed."); - return false; - } - - ret = GetOutputDescInfo(output_desc, output_format); - if (!ret) { - GELOGE(FAILED, "Get output desc info failed."); - return false; - } - - std::vector output_size_list; - std::vector output_memory_size_list; - for (const auto &output_op : output_info_list_) { - if (!OpInfoUtils::GetOutputSize(output_op, output_size_list, output_memory_size_list)) { - GELOGE(FAILED, "GetOutputSize fail."); - return false; - } - } - - if (output_desc->size() != output_size_list.size()) { - GELOGE(INTERNAL_ERROR, "output_desc size[%zu] not equal output_size_list_[%zu] size!", output_desc->size(), - output_size_list.size()); - return false; - } - - const std::vector &size_list = (zero_copy) ? (output_memory_size_list) : (output_size_list); - for (size_t i = 0; i < output_size_list.size(); ++i) { - output_desc->at(i).size = size_list[i]; - } - + std::vector *input_format, std::vector *output_format) { return true; } -bool RuntimeModel::GetInputDescInfo(std::vector *input_desc, - std::vector *formats) { - if (input_desc == nullptr) { - GELOGE(PARAM_INVALID, "Input desc is null."); - return false; - } - - // Analyze input dimension information - for (size_t index = 0; index < data_info_list_.size(); ++index) { - if (data_info_list_[index]->input_tensors.empty()) { - GELOGE(INTERNAL_ERROR, "data info list index %zu input tensors is empty.", index); - return false; - } - InputOutputDescInfo input; - uint32_t n, c, h, w; - Format format = static_cast(data_info_list_[index]->input_tensors[0].format); - if (format == FORMAT_NHWC) { - n = kNhwcDimN; - c = kNhwcDimC; - h = kNhwcDimH; - w = kNhwcDimW; - } else { - n = kNchwDimN; - c = kNchwDimC; - h = kNchwDimH; - w = kNchwDimW; - } - - if (data_info_list_[index]->input_tensors[0].dims.size() == static_cast(domi::NORMAL_TENSOR_SIZE)) { - input.shape_info.num = data_info_list_[index]->input_tensors[0].GetDim(n); - input.shape_info.height = data_info_list_[index]->input_tensors[0].GetDim(h); - input.shape_info.width = data_info_list_[index]->input_tensors[0].GetDim(w); - input.shape_info.channel = data_info_list_[index]->input_tensors[0].GetDim(c); - } - // Original network dimension - for (size_t k = 0; k < data_info_list_[index]->input_tensors[0].dims.size(); ++k) { - input.shape_info.dims.push_back(data_info_list_[index]->input_tensors[0].GetDim(k)); - } - - input.data_type = data_info_list_[index]->input_tensors[0].datatype; - input.name = data_info_list_[index]->name; - input.size = data_info_list_[index]->input_tensors[0].size; - - input_desc->push_back(input); - if (formats != nullptr) { - formats->push_back(format); - } - } - +bool RuntimeModel::GetInputDescInfo(std::vector *input_desc, std::vector *formats) { return true; } -bool RuntimeModel::GetOutputDescInfo(std::vector *output_desc, - std::vector *formats) { - if (output_desc == nullptr) { - GELOGE(PARAM_INVALID, "Output desc is null."); - return false; - } - - // Analyze output dimension information - for (size_t i = 0; i < output_info_list_.size(); ++i) { - const auto &op_info = output_info_list_[i]; - if (op_info == nullptr) { - GELOGE(PARAM_INVALID, "Op info at %zu is null.", i); - return false; - } - auto out_size = static_cast(op_info->output_tensors.size()); - for (uint32_t index = 0; index < out_size; ++index) { - bool is_output = op_info->output_tensors[index].is_output; - if (!is_output) { - continue; - } - - std::string output_name; - InputOutputDescInfo output; - uint32_t format_result; - CreateOutput(index, *op_info, &output, &format_result); - - std::vector src_name = op_info->src_name; - std::vector src_index = op_info->src_index; - if (op_info->type == kNetOutPut) { - GELOGI("Op info %s index %zu is NETOUTPUT.", op_info->name.c_str(), i); - if (index >= src_name.size() || index >= src_index.size()) { - GELOGE(INTERNAL_ERROR, "Construct output_name failed."); - return false; - } - output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + - std::to_string(src_index[index]); - } else { - GELOGI("Op info %s index %zu is not NETOUTPUT, type: %s.", op_info->name.c_str(), i, op_info->type.c_str()); - output_name = std::string("output_") + std::to_string(i) + "_" + op_info->name + "_" + std::to_string(index); - } - output.name = output_name; - - output_desc->push_back(output); - if (formats != nullptr) { - formats->push_back(format_result); - } - } - } +bool RuntimeModel::GetOutputDescInfo(std::vector *output_desc, std::vector *formats) { return true; } void RuntimeModel::CreateOutput(uint32_t index, const OpInfo &op_info, InputOutputDescInfo *output, - uint32_t *format_result) { - if (output == nullptr) { - GELOGE(PARAM_INVALID, "Output desc is null."); - return; - } - - int64_t dims[] = {1, 1, 1, 1}; - if (index >= op_info.output_tensors.size()) { - GELOGE(PARAM_INVALID, "op_info %s output_tensors size %zu, but index %u.", op_info.name.c_str(), - op_info.output_tensors.size(), index); - return; - } - - TensorInfo output_tensor = op_info.output_tensors[index]; - Format format = static_cast(output_tensor.format); - if (format_result != nullptr) { - *format_result = format; - } - - if (format == FORMAT_ND) { // For ND tensor - for (size_t i = 0; i < output_tensor.dims.size() && i < (sizeof(dims) / sizeof(dims[0])); ++i) { - dims[i] = static_cast(output_tensor.GetDim(i)); - } - } else if (format == FORMAT_NHWC) { // For FORMAT_NHWC - dims[0] = output_tensor.GetDim(kNhwcDimN); - dims[1] = output_tensor.GetDim(kNhwcDimC); - dims[2] = output_tensor.GetDim(kNhwcDimH); - dims[3] = output_tensor.GetDim(kNhwcDimW); - } else { // For FORMAT_NCHW - dims[0] = output_tensor.GetDim(kNchwDimN); - dims[1] = output_tensor.GetDim(kNchwDimC); - dims[2] = output_tensor.GetDim(kNchwDimH); - dims[3] = output_tensor.GetDim(kNchwDimW); - } - - output->shape_info.num = dims[0]; // 0: First dim - output->shape_info.channel = dims[1]; // 1: Second dim - output->shape_info.height = dims[2]; // 2: Third dim - output->shape_info.width = dims[3]; // 3: Forth dim - - if (index >= op_info.input_tensors.size()) { - GELOGE(PARAM_INVALID, "input tensors size %zu less than index %u.", op_info.input_tensors.size(), index); - return; - } - - if (op_info.input_tensors[index].format == FORMAT_FRACTAL_Z) { // FraczToHWCK - int64_t k = output_tensor.GetDim(0); // 0: First dim - int64_t c = output_tensor.GetDim(1); // 1: Second dim - int64_t h = output_tensor.GetDim(2); // 2: Third dim - int64_t w = output_tensor.GetDim(3); // 3: Forth dim - output->shape_info.dims.push_back(h); - output->shape_info.dims.push_back(w); - output->shape_info.dims.push_back(c); - output->shape_info.dims.push_back(k); - - if (format_result != nullptr) { - *format_result = FORMAT_HWCN; - } - } else { - for (size_t j = 0; j < output_tensor.dims.size(); ++j) { - output->shape_info.dims.push_back(output_tensor.GetDim(j)); - } - } - - output->data_type = output_tensor.datatype; -} + uint32_t *format_result) {} const std::vector &RuntimeModel::GetTaskIdList() const { return task_id_list_; } diff --git a/src/ge/ge_runtime/runtime_model.h b/src/ge/ge_runtime/runtime_model.h index e304cafb..e8ff4057 100644 --- a/src/ge/ge_runtime/runtime_model.h +++ b/src/ge/ge_runtime/runtime_model.h @@ -16,13 +16,10 @@ #ifndef GE_GE_RUNTIME_RUNTIME_MODEL_H_ #define GE_GE_RUNTIME_RUNTIME_MODEL_H_ - #include #include #include #include - -#include "cce/dnn_base_def.hpp" #include "ge_runtime/davinci_model.h" #include "common/ge_types.h" #include "runtime/base.h" @@ -41,10 +38,8 @@ class RuntimeModel { const std::vector &GetTaskIdList() const; bool Run(); bool CopyInputData(const InputData &input_data); - bool GetInputOutputDescInfo(bool zero_copy, - std::vector *input_desc, - std::vector *output_desc, - std::vector *input_format, + bool GetInputOutputDescInfo(bool zero_copy, std::vector *input_desc, + std::vector *output_desc, std::vector *input_format, std::vector *output_format); private: @@ -58,10 +53,10 @@ class RuntimeModel { bool InitOutputInfo(std::shared_ptr &davinci_model); bool InitConstantInfo(std::shared_ptr &davinci_model); void RtModelUnbindStream() noexcept; - void RtStreamDestroy() noexcept; - void RtModelDestroy() noexcept; - void RtLabelDestroy() noexcept; - void RtEventDestroy() noexcept; + void RtStreamDestory() noexcept; + void RtModelDestory() noexcept; + void RtLabelDestory() noexcept; + void RtEventDestory() noexcept; bool CopyInputDataToModel(const std::vector &data, const std::shared_ptr &data_info); bool CopyHostData(const std::vector &data, const std::shared_ptr &data_info) const; bool CopyTransData(const std::vector &data, const std::shared_ptr &data_info); @@ -80,8 +75,6 @@ class RuntimeModel { std::vector> data_info_list_{}; std::vector> output_info_list_{}; std::vector> constant_info_list_{}; - std::map input_tensor_desc_list_{}; - std::map output_tensor_desc_list_{}; std::vector task_id_list_{}; }; diff --git a/src/ge/ge_runtime/task/aicpu_task.cc b/src/ge/ge_runtime/task/aicpu_task.cc index cfb1cc77..07f287e1 100644 --- a/src/ge/ge_runtime/task/aicpu_task.cc +++ b/src/ge/ge_runtime/task/aicpu_task.cc @@ -15,11 +15,9 @@ */ #include "ge_runtime/task/aicpu_task.h" - #include - -#include "aicpu/common/aicpu_task_struct.h" #include "ge_runtime/task/task_factory.h" +#include "aicpu/common/aicpu_task_struct.h" namespace ge { namespace model_runner { @@ -51,7 +49,7 @@ bool AicpuTask::Distribute() { constexpr uint32_t io_addr_offset = sizeof(aicpu::AicpuParamHead); uint32_t node_def_addr_offset = io_addr_offset + io_addrs_size; uint32_t args_size = - sizeof(aicpu::AicpuParamHead) + io_addrs_size + static_cast(task_info_->node_def().size()); + sizeof(aicpu::AicpuParamHead) + io_addrs_size + static_cast(task_info_->node_def().size()); aicpu::AicpuParamHead aicpu_param_head = {args_size, io_addrs_num}; // Malloc device memory for args @@ -113,6 +111,6 @@ void AicpuTask::ReleaseRtMem(void **ptr) noexcept { *ptr = nullptr; } -REGISTER_TASK(TaskInfoType::kAiCpu, AicpuTask, AicpuTaskInfo); +REGISTER_TASK(TaskInfoType::AICPU, AicpuTask, AicpuTaskInfo); } // namespace model_runner } // namespace ge diff --git a/src/ge/ge_runtime/task/aicpu_task.h b/src/ge/ge_runtime/task/aicpu_task.h index ddb961b1..f5cdc617 100644 --- a/src/ge/ge_runtime/task/aicpu_task.h +++ b/src/ge/ge_runtime/task/aicpu_task.h @@ -18,7 +18,6 @@ #define GE_GE_RUNTIME_TASK_AICPU_TASK_H_ #include - #include "ge_runtime/task/task.h" namespace ge { diff --git a/src/ge/ge_runtime/task/cce_task.cc b/src/ge/ge_runtime/task/cce_task.cc index acfc83b8..e2fef432 100644 --- a/src/ge/ge_runtime/task/cce_task.cc +++ b/src/ge/ge_runtime/task/cce_task.cc @@ -15,7 +15,6 @@ */ #include "ge_runtime/task/cce_task.h" - #include "ge_runtime/task/task_factory.h" namespace ge { @@ -101,15 +100,10 @@ bool CceTask::Distribute() { // Modify flowtable addr in args auto args = const_cast(task_info_->args().data()); - if (task_info_->args_offset().size() < sizeof(uint16_t)) { - GELOGE(FAILED, "size of args_offset is smaller than sizeof(uint16_t)."); - return false; - } auto task_offset = reinterpret_cast(const_cast(task_info_->args_offset().data())); if (task_info_->args().size() < (task_offset[0] + sizeof(uint64_t))) { - GELOGE(FAILED, - "(context.args_offset().data()))[0]:%u + sizeof(uint64_t):%zu > kernelDef.args().size():%zu", + GELOGE(FAILED, "(context.args_offset().data()))[0]:%u + sizeof(uint64_t):%zu > kernelDef.args().size():%zu", static_cast(task_offset[0]), sizeof(uint64_t), task_info_->args().size()); return false; } @@ -139,8 +133,7 @@ bool CceTask::Distribute() { return false; } - rt_ret = rtMemcpy(sm_desc_, task_info_->sm_desc().size(), - task_info_->sm_desc().data(), + rt_ret = rtMemcpy(sm_desc_, task_info_->sm_desc().size(), task_info_->sm_desc().data(), task_info_->sm_desc().size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); @@ -149,12 +142,8 @@ bool CceTask::Distribute() { } // Kernel launch - rt_ret = rtKernelLaunch(stub_func_, - task_info_->block_dim(), - args_, - task_info_->args_size(), - static_cast(sm_desc_), - stream_); + rt_ret = rtKernelLaunch(stub_func_, task_info_->block_dim(), args_, task_info_->args_size(), + static_cast(sm_desc_), stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return false; @@ -162,6 +151,6 @@ bool CceTask::Distribute() { return true; } -REGISTER_TASK(TaskInfoType::kCce, CceTask, CceTaskInfo); +REGISTER_TASK(TaskInfoType::CCE, CceTask, CceTaskInfo); } // namespace model_runner } // namespace ge diff --git a/src/ge/ge_runtime/task/cce_task.h b/src/ge/ge_runtime/task/cce_task.h index e5c936fc..a6e38f66 100644 --- a/src/ge/ge_runtime/task/cce_task.h +++ b/src/ge/ge_runtime/task/cce_task.h @@ -18,7 +18,6 @@ #define GE_GE_RUNTIME_TASK_CCE_TASK_H_ #include - #include "ge_runtime/task/task.h" namespace ge { diff --git a/src/ge/ge_runtime/task/event_record_task.cc b/src/ge/ge_runtime/task/event_record_task.cc index f5a980c0..46ac7a1b 100644 --- a/src/ge/ge_runtime/task/event_record_task.cc +++ b/src/ge/ge_runtime/task/event_record_task.cc @@ -15,7 +15,6 @@ */ #include "ge_runtime/task/event_record_task.h" - #include "ge_runtime/task/task_factory.h" namespace ge { @@ -34,8 +33,8 @@ EventRecordTask::EventRecordTask(const ModelContext &model_context, uint32_t stream_id = task_info->stream_id(); uint32_t event_id = task_info->event_id(); if (stream_id >= stream_list.size() || event_id >= event_list.size()) { - GELOGW("stream_list size:%zu, stream_id:%u, event_list size:%zu, event_id:%u", stream_list.size(), - stream_id, event_list.size(), event_id); + GELOGW("stream_list size:%zu, stream_id:%u, event_list size:%zu, event_id:%u", stream_list.size(), stream_id, + event_list.size(), event_id); } stream_ = stream_list[stream_id]; event_ = event_list[event_id]; @@ -53,6 +52,6 @@ bool EventRecordTask::Distribute() { return true; } -REGISTER_TASK(TaskInfoType::kEventRecord, EventRecordTask, EventRecordTaskInfo); +REGISTER_TASK(TaskInfoType::EVENT_RECORD, EventRecordTask, EventRecordTaskInfo); } // namespace model_runner } // namespace ge diff --git a/src/ge/ge_runtime/task/event_record_task.h b/src/ge/ge_runtime/task/event_record_task.h index 7c0c1042..7c1d4f80 100644 --- a/src/ge/ge_runtime/task/event_record_task.h +++ b/src/ge/ge_runtime/task/event_record_task.h @@ -18,7 +18,6 @@ #define GE_GE_RUNTIME_TASK_EVENT_RECORD_TASK_H_ #include - #include "ge_runtime/task/task.h" namespace ge { @@ -34,7 +33,7 @@ class EventRecordTask : public TaskRepeater { private: std::shared_ptr task_info_; rtStream_t stream_; - rtEvent_t event_; + rtEvent_t event_; }; } // namespace model_runner } // namespace ge diff --git a/src/ge/ge_runtime/task/event_wait_task.cc b/src/ge/ge_runtime/task/event_wait_task.cc index db00bc4c..e4cf986f 100644 --- a/src/ge/ge_runtime/task/event_wait_task.cc +++ b/src/ge/ge_runtime/task/event_wait_task.cc @@ -15,7 +15,6 @@ */ #include "ge_runtime/task/event_wait_task.h" - #include "ge_runtime/task/task_factory.h" namespace ge { @@ -58,6 +57,6 @@ bool EventWaitTask::Distribute() { return true; } -REGISTER_TASK(TaskInfoType::kEventWait, EventWaitTask, EventWaitTaskInfo); +REGISTER_TASK(TaskInfoType::EVENT_WAIT, EventWaitTask, EventWaitTaskInfo); } // namespace model_runner } // namespace ge diff --git a/src/ge/ge_runtime/task/event_wait_task.h b/src/ge/ge_runtime/task/event_wait_task.h index 886b733a..9104bbf8 100644 --- a/src/ge/ge_runtime/task/event_wait_task.h +++ b/src/ge/ge_runtime/task/event_wait_task.h @@ -18,7 +18,6 @@ #define GE_GE_RUNTIME_TASK_EVENT_WAIT_TASK_H_ #include - #include "ge_runtime/task/task.h" namespace ge { @@ -34,7 +33,7 @@ class EventWaitTask : public TaskRepeater { private: std::shared_ptr task_info_; rtStream_t stream_; - rtEvent_t event_; + rtEvent_t event_; }; } // namespace model_runner } // namespace ge diff --git a/src/ge/ge_runtime/task/hccl_task.cc b/src/ge/ge_runtime/task/hccl_task.cc old mode 100755 new mode 100644 index 1a5d0a20..0794c0e9 --- a/src/ge/ge_runtime/task/hccl_task.cc +++ b/src/ge/ge_runtime/task/hccl_task.cc @@ -15,7 +15,6 @@ */ #include "ge_runtime/task/hccl_task.h" - #include "ge_runtime/task/task_factory.h" #include "common/opskernel/ops_kernel_info_store.h" #include "common/opskernel/ge_task_info.h" @@ -23,9 +22,15 @@ namespace ge { namespace model_runner { HcclTask::HcclTask(const ModelContext &model_context, const std::shared_ptr &task_info) - : TaskRepeater(model_context, task_info), task_info_(task_info), stream_(nullptr), - rt_model_handle_(nullptr), priority_(0), slave_stream_list_(), hcom_bind_model_(nullptr), - hcom_unbind_model_(nullptr), hcom_distribute_task_(nullptr) { + : TaskRepeater(model_context, task_info), + task_info_(task_info), + stream_(nullptr), + rt_model_handle_(nullptr), + priority_(0), + slave_stream_list_(), + hcom_bind_model_(nullptr), + hcom_unbind_model_(nullptr), + hcom_distribute_task_(nullptr) { if (task_info_ == nullptr) { GELOGW("task_info_ is null!"); } @@ -40,7 +45,7 @@ HcclTask::HcclTask(const ModelContext &model_context, const std::shared_ptrops_kernel_store(); - OpsKernelInfoStore* ops_kernel_info_store = reinterpret_cast (ops_kernel_store); + OpsKernelInfoStore *ops_kernel_info_store = reinterpret_cast(ops_kernel_store); if (ops_kernel_store == nullptr) { GELOGE(PARAM_INVALID, "No hcom distribute function ptr and no ops kernel store."); return false; @@ -148,6 +153,6 @@ bool HcclTask::Distribute() { return true; } -REGISTER_TASK(TaskInfoType::kHccl, HcclTask, HcclTaskInfo); +REGISTER_TASK(TaskInfoType::HCCL, HcclTask, HcclTaskInfo); } // namespace model_runner } // namespace ge diff --git a/src/ge/ge_runtime/task/hccl_task.h b/src/ge/ge_runtime/task/hccl_task.h index 67788f36..1649a8bd 100644 --- a/src/ge/ge_runtime/task/hccl_task.h +++ b/src/ge/ge_runtime/task/hccl_task.h @@ -20,7 +20,6 @@ #include #include #include - #include "ge_runtime/task/task.h" namespace ge { diff --git a/src/ge/ge_runtime/task/memcpy_async_task.cc b/src/ge/ge_runtime/task/memcpy_async_task.cc index e9610978..8c09b379 100644 --- a/src/ge/ge_runtime/task/memcpy_async_task.cc +++ b/src/ge/ge_runtime/task/memcpy_async_task.cc @@ -15,7 +15,6 @@ */ #include "ge_runtime/task/memcpy_async_task.h" - #include "ge_runtime/task/task_factory.h" namespace ge { @@ -41,8 +40,7 @@ MemcpyAsyncTask::~MemcpyAsyncTask() {} bool MemcpyAsyncTask::Distribute() { GELOGI("MemcpyAsyncTask Distribute start."); GELOGI("dst_max:%lu, count:%lu, kind:%u.", task_info_->dst_max(), task_info_->count(), task_info_->kind()); - rtError_t rt_ret = rtMemcpyAsync(task_info_->dst(), task_info_->dst_max(), - task_info_->src(), task_info_->count(), + rtError_t rt_ret = rtMemcpyAsync(task_info_->dst(), task_info_->dst_max(), task_info_->src(), task_info_->count(), static_cast(task_info_->kind()), stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); @@ -52,6 +50,6 @@ bool MemcpyAsyncTask::Distribute() { return true; } -REGISTER_TASK(TaskInfoType::kMemcpyAsync, MemcpyAsyncTask, MemcpyAsyncTaskInfo); +REGISTER_TASK(TaskInfoType::MEMCPY_ASYNC, MemcpyAsyncTask, MemcpyAsyncTaskInfo); } // namespace model_runner } // namespace ge diff --git a/src/ge/ge_runtime/task/memcpy_async_task.h b/src/ge/ge_runtime/task/memcpy_async_task.h index fd2e42ae..949aacc3 100644 --- a/src/ge/ge_runtime/task/memcpy_async_task.h +++ b/src/ge/ge_runtime/task/memcpy_async_task.h @@ -18,7 +18,6 @@ #define GE_GE_RUNTIME_TASK_MEMCPY_ASYNC_TASK_H_ #include - #include "ge_runtime/task/task.h" namespace ge { diff --git a/src/ge/ge_runtime/task/profiler_task.cc b/src/ge/ge_runtime/task/profiler_task.cc index 95fddd67..535c4f36 100644 --- a/src/ge/ge_runtime/task/profiler_task.cc +++ b/src/ge/ge_runtime/task/profiler_task.cc @@ -15,7 +15,6 @@ */ #include "ge_runtime/task/profiler_task.h" - #include "ge_runtime/task/task_factory.h" namespace ge { @@ -48,6 +47,7 @@ bool ProfilerTask::Distribute() { return true; } -REGISTER_TASK(TaskInfoType::kProfilerTrace, ProfilerTask, ProfilerTraceTaskInfo); +REGISTER_TASK(TaskInfoType::PROFILER_TRACE, ProfilerTask, ProfilerTraceTaskInfo); + } // namespace model_runner } // namespace ge diff --git a/src/ge/ge_runtime/task/profiler_task.h b/src/ge/ge_runtime/task/profiler_task.h index ce814a6d..dcdaa987 100644 --- a/src/ge/ge_runtime/task/profiler_task.h +++ b/src/ge/ge_runtime/task/profiler_task.h @@ -18,7 +18,6 @@ #define GE_GE_RUNTIME_TASK_PROFILER_TASK_H_ #include - #include "ge_runtime/task/task.h" namespace ge { diff --git a/src/ge/ge_runtime/task/stream_active_task.cc b/src/ge/ge_runtime/task/stream_active_task.cc index 2e91988d..d58b8942 100644 --- a/src/ge/ge_runtime/task/stream_active_task.cc +++ b/src/ge/ge_runtime/task/stream_active_task.cc @@ -53,6 +53,6 @@ bool StreamActiveTask::Distribute() { return true; } -REGISTER_TASK(TaskInfoType::kStreamActive, StreamActiveTask, StreamActiveTaskInfo); +REGISTER_TASK(TaskInfoType::STREAM_ACTIVE, StreamActiveTask, StreamActiveTaskInfo); } // namespace model_runner } // namespace ge diff --git a/src/ge/ge_runtime/task/stream_active_task.h b/src/ge/ge_runtime/task/stream_active_task.h index 342fa78f..9f6cc96d 100644 --- a/src/ge/ge_runtime/task/stream_active_task.h +++ b/src/ge/ge_runtime/task/stream_active_task.h @@ -18,7 +18,6 @@ #define GE_GE_RUNTIME_TASK_STREAM_ACTIVE_TASK_H_ #include - #include "ge_runtime/task/task.h" namespace ge { diff --git a/src/ge/ge_runtime/task/stream_switch_task.cc b/src/ge/ge_runtime/task/stream_switch_task.cc index 1df3834a..afbdba18 100644 --- a/src/ge/ge_runtime/task/stream_switch_task.cc +++ b/src/ge/ge_runtime/task/stream_switch_task.cc @@ -15,7 +15,6 @@ */ #include "ge_runtime/task/stream_switch_task.h" - #include "ge_runtime/task/task_factory.h" namespace ge { @@ -73,6 +72,6 @@ bool StreamSwitchTask::Distribute() { return true; } -REGISTER_TASK(TaskInfoType::kStreamSwitch, StreamSwitchTask, StreamSwitchTaskInfo); +REGISTER_TASK(TaskInfoType::STREAM_SWITCH, StreamSwitchTask, StreamSwitchTaskInfo); } // namespace model_runner } // namespace ge diff --git a/src/ge/ge_runtime/task/stream_switch_task.h b/src/ge/ge_runtime/task/stream_switch_task.h index fce56e47..81c12507 100644 --- a/src/ge/ge_runtime/task/stream_switch_task.h +++ b/src/ge/ge_runtime/task/stream_switch_task.h @@ -19,7 +19,6 @@ #include #include - #include "ge_runtime/task/task.h" namespace ge { diff --git a/src/ge/ge_runtime/task/task.h b/src/ge/ge_runtime/task/task.h index 431706d1..7c748a7d 100644 --- a/src/ge/ge_runtime/task/task.h +++ b/src/ge/ge_runtime/task/task.h @@ -19,7 +19,6 @@ #include #include - #include "runtime/rt_model.h" #include "ge_runtime/model_context.h" #include "ge_runtime/task_info.h" diff --git a/src/ge/ge_runtime/task/task_factory.h b/src/ge/ge_runtime/task/task_factory.h index 82743257..670d1fef 100644 --- a/src/ge/ge_runtime/task/task_factory.h +++ b/src/ge/ge_runtime/task/task_factory.h @@ -21,7 +21,6 @@ #include #include #include - #include "common/ge_inner_error_codes.h" #include "framework/common/debug/ge_log.h" #include "ge_runtime/task_info.h" @@ -76,13 +75,12 @@ class TaskFactory { }; }; -#define REGISTER_TASK(type, task_clazz, task_info_clazz) \ - TaskFactory::Register g_##task_clazz##_register( \ - type, \ - [](const ModelContext &model_context, const std::shared_ptr &task_info) -> std::shared_ptr { \ - std::shared_ptr concrete_task_info = std::static_pointer_cast(task_info); \ - return std::make_shared(model_context, concrete_task_info); \ - }); +#define REGISTER_TASK(type, task_clazz, task_info_clazz) \ + TaskFactory::Register g_##task_clazz##_register( \ + type, [](const ModelContext &model_context, const std::shared_ptr &task_info) -> std::shared_ptr { \ + std::shared_ptr concrete_task_info = std::static_pointer_cast(task_info); \ + return std::make_shared(model_context, concrete_task_info); \ + }); } // namespace model_runner } // namespace ge diff --git a/src/ge/ge_runtime/task/tbe_task.cc b/src/ge/ge_runtime/task/tbe_task.cc index 36be96b3..88279f1e 100644 --- a/src/ge/ge_runtime/task/tbe_task.cc +++ b/src/ge/ge_runtime/task/tbe_task.cc @@ -15,9 +15,7 @@ */ #include "ge_runtime/task/tbe_task.h" - #include - #include "ge_runtime/task/task_factory.h" namespace ge { @@ -107,6 +105,6 @@ bool TbeTask::Distribute() { return true; } -REGISTER_TASK(TaskInfoType::kTbe, TbeTask, TbeTaskInfo); +REGISTER_TASK(TaskInfoType::TBE, TbeTask, TbeTaskInfo); } // namespace model_runner } // namespace ge diff --git a/src/ge/ge_runtime/task/tbe_task.h b/src/ge/ge_runtime/task/tbe_task.h index f9ca56bb..994ba5e2 100644 --- a/src/ge/ge_runtime/task/tbe_task.h +++ b/src/ge/ge_runtime/task/tbe_task.h @@ -18,7 +18,6 @@ #define GE_GE_RUNTIME_TASK_TBE_TASK_H_ #include - #include "ge_runtime/task/task.h" namespace ge { diff --git a/src/ge/generator/ge_generator.cc b/src/ge/generator/ge_generator.cc index 2d3814d5..728bc424 100644 --- a/src/ge/generator/ge_generator.cc +++ b/src/ge/generator/ge_generator.cc @@ -15,7 +15,6 @@ */ #include "generator/ge_generator.h" - #include "common/ge/ge_util.h" #include "common/ge/plugin_manager.h" #include "common/helper/model_helper.h" @@ -29,9 +28,9 @@ #include "graph/utils/graph_utils.h" #include "model/ge_model.h" +using std::map; using std::string; using std::vector; -using std::map; namespace { const char *const kAttrOpType = "op_type"; @@ -223,15 +222,15 @@ Status GeGenerator::GenerateOfflineModel(const Graph &graph, const string &file_ return SUCCESS; } -/// -/// @ingroup ge -/// @brief Compiling a single operator into an offline model -/// @param [in] OpDescPtr &op_desc: Operator description info that needs to be compiled into an offline model file -/// @param [in] vector &inputs: Operator input data description information. -/// @param [in] vector &outputs: Operator output data description information. -/// @param [in] const string &model_file_name: Offline model filename. -/// @return SUCCESS handle successfully / others handle failed -/// +/** + * @ingroup ge + * @brief Compiling a single operator into an offline model + * @param [in] OpDescPtr &op_desc: Operator description info that needs to be compiled into an offline model file + * @param [in] vector &inputs: Operator input data description information. + * @param [in] vector &outputs: Operator output data description information. + * @param [in] const string &model_file_name: Offline model filename. + * @return SUCCESS handle successfully / others handle failed + */ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, const string &model_file_name) { GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); @@ -244,9 +243,6 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector op_attrs = op_desc->GetAllAttrs(); - // 1. Create ComputeGraph. string name = ge::CurrentTimeInStr() + "_" + model_file_name; ge::ComputeGraphPtr compute_graph = MakeShared(name); @@ -294,6 +290,7 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vectorBuildModel(graph, inputs, graph_id, ge_models)); if (!ge_models.empty()) { + map op_attrs = op_desc->GetAllAttrs(); GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_models[0], op_desc->GetType(), op_attrs, inputs, outputs)); } @@ -330,8 +327,8 @@ Status GeGenerator::Impl::SaveModel(const string &file_name_prefix, vector &inputs, GraphId &graph_id, vector &ge_models) { static GraphId id = 0; - - Status ret = graph_manager_.AddGraph(id, graph); + const std::map options; + Status ret = graph_manager_.AddGraph(id, graph, options); if (ret != SUCCESS) { GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "graphManager AddGraph failed, id: %u", id); graph_manager_.Finalize(); diff --git a/src/ge/generator/generator_api.cc b/src/ge/generator/generator_api.cc index 44c85535..3f92f1a2 100644 --- a/src/ge/generator/generator_api.cc +++ b/src/ge/generator/generator_api.cc @@ -15,7 +15,6 @@ */ #include "generator/generator_api.h" - #include "common/ge/ge_util.h" #include "common/util.h" #include "framework/common/debug/ge_log.h" @@ -99,18 +98,18 @@ class OpAttr { std::map attrs_; }; -/// -/// @ingroup ge -/// @brief Generate offline model for the op. -/// @param [in] op_type: type name of the op. -/// @param [in] in_tensor: input description array (created by OpTensorCreate). -/// @param [in] in_num: number of in_tensor. -/// @param [in] out_tensor: output description array (created by OpTensorCreate). -/// @param [in] out_num: number of out_tensor. -/// @param [in] attr: the attributes of the op (created by OpAttrCreate). -/// @param [in] om_file: file name for the om to save. -/// @return 0 for success / others for fail -/// +/** + * @ingroup ge + * @brief Generate offline model for the op. + * @param [in] op_type: type name of the op. + * @param [in] in_tensor: input description array (created by OpTensorCreate). + * @param [in] in_num: number of in_tensor. + * @param [in] out_tensor: output description array (created by OpTensorCreate). + * @param [in] out_num: number of out_tensor. + * @param [in] attr: the attributes of the op (created by OpAttrCreate). + * @param [in] om_file: file name for the om to save. + * @return 0 for success / others for fail + */ Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int in_num, const OpTensor_t *out_tensor, int out_num, const OpAttr_t attr, const char *om_file) { CHECK_PARAM_NOT_NULL(op_type); @@ -167,15 +166,15 @@ Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int return generator.BuildSingleOpModel(op_desc, inputs, outputs, om_file_name); } -/// -/// @ingroup ge -/// @brief Create Tensor Description. -/// @param [in] format: tensor format of the data. -/// @param [in] datatype: tensor type of the data. -/// @param [in] shape: tensor shape array. -/// @param [in] num: number of shape. -/// @return OpTensor_t for success / nullptr for fail -/// +/** + * @ingroup ge + * @brief Create Tensor Description. + * @param [in] format: tensor format of the data. + * @param [in] datatype: tensor type of the data. + * @param [in] shape: tensor shape array. + * @param [in] num: number of shape. + * @return OpTensor_t for success / nullptr for fail + */ OpTensor_t OpTensorCreate(int format, int datatype, const int64_t *shape, int num) { std::vector dims; if (shape != nullptr) { @@ -190,13 +189,13 @@ OpTensor_t OpTensorCreate(int format, int datatype, const int64_t *shape, int nu return new (std::nothrow) ge::TensorDesc(ge::Shape(dims), fmt, dt); } -/// -/// @ingroup ge -/// @brief Destroy Tensor Description. -/// @param [in] OpTensor_t tensor: created by OpTensorCreate. -/// @param [out] none -/// @return 0 for success / others for fail. -/// +/** + * @ingroup ge + * @brief Destroy Tensor Description. + * @param [in] OpTensor_t tensor: created by OpTensorCreate. + * @param [out] none + * @return 0 for success / others for fail. + */ Status_t OpTensorDestroy(OpTensor_t tensor) { ge::TensorDesc *op_tensor = CHECK_PARAM_OBJECT(ge::TensorDesc, tensor); delete op_tensor; @@ -205,22 +204,22 @@ Status_t OpTensorDestroy(OpTensor_t tensor) { return ge::SUCCESS; } -/// -/// @ingroup ge -/// @brief Create an attribute holder. -/// @param [in] none -/// @param [out] none -/// @return OpAttr_t for success / nullptr for fail. -/// +/** + * @ingroup ge + * @brief Create an attribute holder. + * @param [in] none + * @param [out] none + * @return OpAttr_t for success / nullptr for fail. + */ OpAttr_t OpAttrCreate() { return new (std::nothrow) OpAttr; } -/// -/// @ingroup ge -/// @brief Destroy Attribute holder. -/// @param [in] OpAttr_t attr: created by OpAttrCreate. -/// @param [out] none -/// @return 0 for success / others for fail. -/// +/** + * @ingroup ge + * @brief Destroy Attribute holder. + * @param [in] OpAttr_t attr: created by OpAttrCreate. + * @param [out] none + * @return 0 for success / others for fail. + */ Status_t OpAttrDestroy(OpAttr_t attr) { OpAttr *op_attr = CHECK_PARAM_OBJECT(OpAttr, attr); delete op_attr; @@ -228,14 +227,14 @@ Status_t OpAttrDestroy(OpAttr_t attr) { return ge::SUCCESS; } -/// -/// @ingroup ge -/// @brief Set a boolean attribute to the attribute holder. -/// @param [in] attr: attribute holder (created by OpAttrCreate). -/// @param [in] name: attribute name (can`t be nullptr, end with '\0'). -/// @param [in] value: attribute value. -/// @return 0 for success / others for fail. -/// +/** + * @ingroup ge + * @brief Set a boolean attribute to the attribute holder. + * @param [in] attr: attribute holder (created by OpAttrCreate). + * @param [in] name: attribute name (can`t be nullptr, end with '\0'). + * @param [in] value: attribute value. + * @return 0 for success / others for fail. + */ Status_t SetAttrBool(OpAttr_t attr, const char *name, bool value) { CHECK_PARAM_NOT_NULL(name); OpAttr *op_attr = CHECK_PARAM_OBJECT(OpAttr, attr); @@ -243,14 +242,14 @@ Status_t SetAttrBool(OpAttr_t attr, const char *name, bool value) { return op_attr->SetAttr(name, value); } -/// -/// @ingroup ge -/// @brief Set an integer attribute to the attribute holder. -/// @param [in] attr: attribute holder (created by OpAttrCreate). -/// @param [in] name: attribute name (can`t be nullptr, end with '\0'). -/// @param [in] value: attribute value. -/// @return 0 for success / others for fail. -/// +/** + * @ingroup ge + * @brief Set an integer attribute to the attribute holder. + * @param [in] attr: attribute holder (created by OpAttrCreate). + * @param [in] name: attribute name (can`t be nullptr, end with '\0'). + * @param [in] value: attribute value. + * @return 0 for success / others for fail. + */ Status_t SetAttrInt(OpAttr_t attr, const char *name, int64_t value) { CHECK_PARAM_NOT_NULL(name); OpAttr *op_attr = CHECK_PARAM_OBJECT(OpAttr, attr); @@ -258,14 +257,14 @@ Status_t SetAttrInt(OpAttr_t attr, const char *name, int64_t value) { return op_attr->SetAttr(name, value); } -/// -/// @ingroup ge -/// @brief Set a float attribute to the attribute holder. -/// @param [in] attr: attribute holder (created by OpAttrCreate). -/// @param [in] name: attribute name (can`t be nullptr, end with '\0'). -/// @param [in] value: attribute value. -/// @return 0 for success / others for fail. -/// +/** + * @ingroup ge + * @brief Set a float attribute to the attribute holder. + * @param [in] attr: attribute holder (created by OpAttrCreate). + * @param [in] name: attribute name (can`t be nullptr, end with '\0'). + * @param [in] value: attribute value. + * @return 0 for success / others for fail. + */ Status_t SetAttrFloat(OpAttr_t attr, const char *name, float value) { CHECK_PARAM_NOT_NULL(name); OpAttr *op_attr = CHECK_PARAM_OBJECT(OpAttr, attr); @@ -273,14 +272,14 @@ Status_t SetAttrFloat(OpAttr_t attr, const char *name, float value) { return op_attr->SetAttr(name, value); } -/// -/// @ingroup ge -/// @brief Set a string attribute to the attribute holder. -/// @param [in] attr: attribute holder (created by OpAttrCreate). -/// @param [in] name: attribute name (can`t be nullptr, end with '\0'). -/// @param [in] value: attribute value (can`t be nullptr, end with '\0'). -/// @return 0 for success / others for fail. -/// +/** + * @ingroup ge + * @brief Set a string attribute to the attribute holder. + * @param [in] attr: attribute holder (created by OpAttrCreate). + * @param [in] name: attribute name (can`t be nullptr, end with '\0'). + * @param [in] value: attribute value (can`t be nullptr, end with '\0'). + * @return 0 for success / others for fail. + */ Status_t SetAttrString(OpAttr_t attr, const char *name, const char *value) { CHECK_PARAM_NOT_NULL(name); CHECK_PARAM_NOT_NULL(value); @@ -289,15 +288,15 @@ Status_t SetAttrString(OpAttr_t attr, const char *name, const char *value) { return op_attr->SetAttr(name, string(value)); } -/// -/// @ingroup ge -/// @brief Set a boolean array attribute to the attribute holder. -/// @param [in] attr: attribute holder (created by OpAttrCreate). -/// @param [in] name: attribute name (can`t be nullptr, end with '\0'). -/// @param [in] value: attribute value array. -/// @param [in] num: number of value array. -/// @return 0 for success / others for fail. -/// +/** + * @ingroup ge + * @brief Set a boolean array attribute to the attribute holder. + * @param [in] attr: attribute holder (created by OpAttrCreate). + * @param [in] name: attribute name (can`t be nullptr, end with '\0'). + * @param [in] value: attribute value array. + * @param [in] num: number of value array. + * @return 0 for success / others for fail. + */ Status_t SetAttrBoolList(OpAttr_t attr, const char *name, const bool *value, int num) { CHECK_PARAM_NOT_NULL(name); CHECK_PARAM_NOT_NULL(value); @@ -305,15 +304,16 @@ Status_t SetAttrBoolList(OpAttr_t attr, const char *name, const bool *value, int return op_attr->SetAttr(name, value, num); } -/// -/// @ingroup ge -/// @brief Set an integer array attribute to the attribute holder. -/// @param [in] attr: attribute holder (created by OpAttrCreate). -/// @param [in] name: attribute name (can`t be nullptr, end with '\0'). -/// @param [in] value: attribute value array. -/// @param [in] num: number of value array. -/// @return 0 for success / others for fail. -/// + +/** + * @ingroup ge + * @brief Set an integer array attribute to the attribute holder. + * @param [in] attr: attribute holder (created by OpAttrCreate). + * @param [in] name: attribute name (can`t be nullptr, end with '\0'). + * @param [in] value: attribute value array. + * @param [in] num: number of value array. + * @return 0 for success / others for fail. + */ Status_t SetAttrIntList(OpAttr_t attr, const char *name, const int64_t *value, int num) { CHECK_PARAM_NOT_NULL(name); CHECK_PARAM_NOT_NULL(value); @@ -322,15 +322,15 @@ Status_t SetAttrIntList(OpAttr_t attr, const char *name, const int64_t *value, i return op_attr->SetAttr(name, value, num); } -/// -/// @ingroup ge -/// @brief Set a float array attribute to the attribute holder. -/// @param [in] attr: attribute holder (created by OpAttrCreate). -/// @param [in] name: attribute name (can`t be nullptr, end with '\0'). -/// @param [in] value: attribute value array. -/// @param [in] num: number of value array. -/// @return 0 for success / others for fail. -/// +/** + * @ingroup ge + * @brief Set a float array attribute to the attribute holder. + * @param [in] attr: attribute holder (created by OpAttrCreate). + * @param [in] name: attribute name (can`t be nullptr, end with '\0'). + * @param [in] value: attribute value array. + * @param [in] num: number of value array. + * @return 0 for success / others for fail. + */ Status_t SetAttrFloatList(OpAttr_t attr, const char *name, const float *value, int num) { CHECK_PARAM_NOT_NULL(name); CHECK_PARAM_NOT_NULL(value); @@ -339,15 +339,15 @@ Status_t SetAttrFloatList(OpAttr_t attr, const char *name, const float *value, i return op_attr->SetAttr(name, value, num); } -/// -/// @ingroup ge -/// @brief Set a string array attribute to the attribute holder. -/// @param [in] attr: attribute holder (created by OpAttrCreate). -/// @param [in] name: attribute name (can`t be nullptr, end with '\0'). -/// @param [in] value: attribute value array (each value can`t be nullptr, end with '\0'). -/// @param [in] num: number of value array. -/// @return 0 for success / others for fail. -/// +/** + * @ingroup ge + * @brief Set a string array attribute to the attribute holder. + * @param [in] attr: attribute holder (created by OpAttrCreate). + * @param [in] name: attribute name (can`t be nullptr, end with '\0'). + * @param [in] value: attribute value array (each value can`t be nullptr, end with '\0'). + * @param [in] num: number of value array. + * @return 0 for success / others for fail. + */ Status_t SetAttrStringList(OpAttr_t attr, const char *name, const char **value, int num) { CHECK_PARAM_NOT_NULL(name); CHECK_PARAM_NOT_NULL(value); diff --git a/src/ge/graph/build/graph_build.cc b/src/ge/graph/build/graph_build.cc index 25fc45cc..7b4813fa 100644 --- a/src/ge/graph/build/graph_build.cc +++ b/src/ge/graph/build/graph_build.cc @@ -15,7 +15,6 @@ */ #include "graph/build/graph_build.h" - #include "common/ge/ge_util.h" #include "common/helper/model_helper.h" #include "common/opskernel/ops_kernel_info_types.h" @@ -199,8 +198,8 @@ Status GraphBuilder::GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr auto *get_var_mem_base = reinterpret_cast(ge::VarManager::Instance(0)->GetVarMemLogicBase()); uint64_t var_size = (ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) > 0) - ? ge::VarManager::Instance(0)->GetVarMemMaxSize() - : 0; + ? ge::VarManager::Instance(0)->GetVarMemMaxSize() + : 0; TaskGenerator task_generator(get_var_mem_base, var_size); ret = task_generator.GetTaskInfo(*model_ptr, comp_graph, session_id, run_context.GetRunContext()); @@ -209,7 +208,6 @@ Status GraphBuilder::GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) { // set input_desc.size = src_node.output_desc.size - GELOGI("Start to set input desc size."); for (const auto &in_data_anchor : node_ptr->GetAllInDataAnchors()) { const auto &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); @@ -223,18 +221,18 @@ Status GraphBuilder::SetInputSize(const ge::NodePtr &node_ptr) { uint32_t size = 0; GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(desc_temp, size) != SUCCESS, GELOGI("Get size failed!")); - GELOGI("src node %s output desc, dim_size: %zu, mem_size: %u, format: %s, type: %s.", src_node->GetName().c_str(), + GELOGD("src node %s output desc, dim_size: %zu, mem_size: %u, format: %s, type: %s.", src_node->GetName().c_str(), desc_temp.GetShape().GetDimNum(), size, TypeUtils::FormatToSerialString(desc_temp.GetFormat()).c_str(), TypeUtils::DataTypeToSerialString(desc_temp.GetDataType()).c_str()); for (size_t i = 0; i < desc_temp.GetShape().GetDimNum(); ++i) { - GELOGI("dims[%zu]: %ld", i, desc_temp.GetShape().GetDim(i)); + GELOGD("dims[%zu]: %ld", i, desc_temp.GetShape().GetDim(i)); } auto input_desc = node_op_desc->GetInputDescPtr(in_data_anchor->GetIdx()); GE_CHECK_NOTNULL(input_desc); ge::TensorUtils::SetSize(const_cast(*input_desc), size); GE_CHK_STATUS_RET(node_op_desc->UpdateInputDesc(in_data_anchor->GetIdx(), *input_desc)); - GELOGI("%s input desc, dim_size: %zu, mem_size: %u, format: %s, type: %s.", node_ptr->GetName().c_str(), + GELOGD("%s input desc, dim_size: %zu, mem_size: %u, format: %s, type: %s.", node_ptr->GetName().c_str(), input_desc->GetShape().GetDimNum(), size, TypeUtils::FormatToSerialString(input_desc->GetFormat()).c_str(), TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str()); } diff --git a/src/ge/graph/build/graph_build.h b/src/ge/graph/build/graph_build.h index b6f521e5..b7ceec62 100644 --- a/src/ge/graph/build/graph_build.h +++ b/src/ge/graph/build/graph_build.h @@ -16,14 +16,12 @@ #ifndef GE_GRAPH_BUILD_GRAPH_BUILD_H_ #define GE_GRAPH_BUILD_GRAPH_BUILD_H_ - #include #include #include #include #include #include - #include "common/debug/log.h" #include "common/debug/memory_dumper.h" #include "common/properties_manager.h" @@ -48,15 +46,14 @@ class GraphBuilder { GraphBuilder(const GraphBuilder &in) = delete; GraphBuilder &operator=(const GraphBuilder &in) = delete; virtual ~GraphBuilder() = default; - Status Build(ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, - GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); + Status Build(ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, GeModelPtr &ge_model_ptr, + uint64_t session_id = INVALID_SESSION_ID); void SetOptions(const GraphManagerOptions &options); private: Status CalcOpParam(const ge::ComputeGraphPtr &graph); - Status GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr &model_ptr, - ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, - uint64_t session_id = INVALID_SESSION_ID); + Status GetTaskInfo(const ge::ModelBuilder &builder, const ModelPtr &model_ptr, ComputeGraphPtr &comp_graph, + std::vector &subgraph_ptr_list, uint64_t session_id = INVALID_SESSION_ID); Status SetInputSize(const ge::NodePtr &node_ptr); Status SecondPartition(ge::ComputeGraphPtr &comp_graph, vector &subgraph_ptr_list); diff --git a/src/ge/graph/build/logical_stream_allocator.cc b/src/ge/graph/build/logical_stream_allocator.cc index c9c9c008..509f591f 100644 --- a/src/ge/graph/build/logical_stream_allocator.cc +++ b/src/ge/graph/build/logical_stream_allocator.cc @@ -19,12 +19,13 @@ #include "framework/common/debug/ge_log.h" #include "framework/common/fmk_error_codes.h" #include "framework/common/types.h" +#include "graph/utils/graph_utils.h" #include "graph/debug/ge_attr_define.h" -using std::string; -using std::vector; using std::map; using std::set; +using std::string; +using std::vector; namespace ge { LogicalStreamPass::LogicalStreamPass(const string &name) : name_(name) {} @@ -207,8 +208,8 @@ bool AssignByDependencyPass::CouldReuse(const SubgraphPtr &subgraph, const Subgr } LogicalStreamPass::SubgraphPtr AssignByDependencyPass::GetReusableSubgraph( - const SubgraphPtr &subgraph, const map &end_subgraph_map, - const map &pld_subgraph_map) { + const SubgraphPtr &subgraph, const map &end_subgraph_map, + const map &pld_subgraph_map) { const SubGraphInfo &subgraph_info = subgraph->subgraph_info; for (const auto &pld_2_end : subgraph_info.GetPld2EndMap()) { const NodePtr &peer_end = pld_2_end.second; @@ -265,7 +266,6 @@ void AssignByDependencyPass::UpdateAssignedSubgraphs(Context &context) { // Update the subgraphs assigned by the engine. for (auto &subgraph : assigned_subgraphs_) { subgraph->stream_id += engine_start_streams[subgraph->engine_conf.id]; - GELOGI("Stream of subgraph %s has been updated to %ld.", subgraph->name.c_str(), subgraph->stream_id); } } @@ -308,7 +308,7 @@ Status NodeStreamUpdatePass::Run(ComputeGraphPtr whole_graph, const vectorGetDirectNode()) { GE_CHECK_NOTNULL(node->GetOpDesc()); if (IsEngineSkip(*subgraph) && node->GetInNodes().empty()) { - GELOGI("Node %s of type %s in subgraph %s doesn't need to assign a stream (engine: %s).", + GELOGD("Node %s of type %s in subgraph %s doesn't need to assign a stream (engine: %s).", node->GetName().c_str(), node->GetType().c_str(), subgraph->name.c_str(), engine_name.c_str()); } else { node->GetOpDesc()->SetStreamId(stream_id); @@ -328,12 +328,13 @@ Status AllReduceParallelPass::Run(ComputeGraphPtr whole_graph, const vector all_reduce_succs; for (const NodePtr &node : whole_graph->GetDirectNode()) { - if (node->GetType() != HCOMALLREDUCE) { + if (node->GetType() != HCOMALLREDUCE || node->GetInDataNodes().size() <= 1) { continue; } diff --git a/src/ge/graph/build/memory/CMakeLists.txt b/src/ge/graph/build/memory/CMakeLists.txt index 90bf510c..aa474dd8 100644 --- a/src/ge/graph/build/memory/CMakeLists.txt +++ b/src/ge/graph/build/memory/CMakeLists.txt @@ -34,7 +34,6 @@ include_directories(${GE_SOURCE_DIR}/inc/external/graph) include_directories(${GE_SOURCE_DIR}/inc/framework) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) include_directories(${GE_SOURCE_DIR}/third_party/securec/include) -include_directories(${GE_SOURCE_DIR}/third_party/protobuf/src) include_directories(${CMAKE_BINARY_DIR}) include_directories(${CMAKE_BINARY_DIR}/proto/ge) diff --git a/src/ge/graph/build/memory/binary_block_mem_assigner.cc b/src/ge/graph/build/memory/binary_block_mem_assigner.cc index f4312976..8668e81e 100644 --- a/src/ge/graph/build/memory/binary_block_mem_assigner.cc +++ b/src/ge/graph/build/memory/binary_block_mem_assigner.cc @@ -15,9 +15,7 @@ */ #include "graph/build/memory/binary_block_mem_assigner.h" - #include - #include "framework/common/debug/ge_log.h" #include "graph/utils/type_utils.h" @@ -77,7 +75,7 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector &range_ceils) { return FAILED; } auto range_number = static_cast( - ceil(log(all_memory_size.back() / static_cast(all_memory_size.front())) / log(kLogBase))); + ceil(log(all_memory_size.back() / static_cast(all_memory_size.front())) / log(kLogBase))); range_number = (range_number == 0) ? 1 : range_number; GELOGI("Range number: %zu", range_number); diff --git a/src/ge/graph/build/memory/binary_block_mem_assigner.h b/src/ge/graph/build/memory/binary_block_mem_assigner.h index 3c94cdc7..678a8adf 100644 --- a/src/ge/graph/build/memory/binary_block_mem_assigner.h +++ b/src/ge/graph/build/memory/binary_block_mem_assigner.h @@ -19,7 +19,6 @@ #include #include - #include "graph/build/memory/block_mem_assigner.h" namespace ge { diff --git a/src/ge/graph/build/memory/block_mem_assigner.cc b/src/ge/graph/build/memory/block_mem_assigner.cc index 759fe0c1..42e03839 100644 --- a/src/ge/graph/build/memory/block_mem_assigner.cc +++ b/src/ge/graph/build/memory/block_mem_assigner.cc @@ -268,8 +268,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, } auto op_type = node_op_desc->GetType(); bool is_reuse_memory = !out_flg && reuse_mem_flag && (op_type != DATA_TYPE) && (op_type != AIPP_DATA_TYPE) && - (op_type != CONSTANT) && (op_type != NETOUTPUT) && (op_type != PROPOSAL) && - (op_type != ANN_DATA_TYPE) && (op_type != ZEROSLIKE) && (op_type != CONSTANTOP); + (op_type != CONSTANT) && (op_type != NETOUTPUT) && (op_type != PROPOSAL) && + (op_type != ANN_DATA_TYPE) && (op_type != ZEROSLIKE) && (op_type != CONSTANTOP); auto stream_id = node_op_desc->GetStreamId(); auto map_iter = reusable_streams_map_.find(stream_id); if (is_reuse_memory && map_iter != reusable_streams_map_.end()) { @@ -278,10 +278,10 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, bool is_data = false; for (auto node_type : reusable_block->NodeTypeIndexList()) { GE_IF_BOOL_EXEC(node_type.node_ != nullptr, string type = node_type.node_->GetType(); - bool flag = (type == DATA_TYPE) || (type == ENTER) || (type == REFENTER) || - (type == AIPP_DATA_TYPE) || (type == NEXTITERATION) || - (type == REFNEXTITERATION); - GE_IF_BOOL_EXEC(flag, is_data = true; break;);); + bool flag = (type == DATA_TYPE) || (type == ENTER) || (type == REFENTER) || + (type == AIPP_DATA_TYPE) || (type == NEXTITERATION) || + (type == REFNEXTITERATION); + GE_IF_BOOL_EXEC(flag, is_data = true; break;);); } GE_IF_BOOL_EXEC(is_data == true, continue); @@ -337,9 +337,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, auto reuse_src_node = peer_out_anchor->GetOwnerNode(); auto reuse_src_node_output_index = static_cast(peer_out_anchor->GetIdx()); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - (node_out_blocks_.empty() || - (node_out_blocks_[reuse_src_node->GetName()].size() <= reuse_src_node_output_index)), - return nullptr, "node_out_block of node_out_block[reuse_src_node->Name()] is empty!"); + (node_out_blocks_.empty() || (node_out_blocks_[reuse_src_node->GetName()].size() <= reuse_src_node_output_index)), + return nullptr, "node_out_block of node_out_block[reuse_src_node->Name()] is empty!"); block = node_out_blocks_[reuse_src_node->GetName()][reuse_src_node_output_index]; } else { auto block_size = GetBlockSize(size, ranges); @@ -347,10 +346,11 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, block = ApplyMemory(block_size, size, kOutput, n, index, workspace_reuse_flag); } GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "Block is nullptr."); + int out_count_reuse_input = block->ref_count_; + int out_count = 0; GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), GELOGE(FAILED, "index is out of range."); return nullptr); auto out_data_anchor = n->GetOutDataAnchor(index); GE_IF_BOOL_EXEC(out_data_anchor == nullptr, GELOGE(FAILED, "Out data anchor is nullptr."); return nullptr); - int out_count = 0; for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { auto owner_node = in_anchor->GetOwnerNode(); auto op_desc = owner_node->GetOpDesc(); @@ -361,7 +361,6 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, out_count++; } } - int out_count_reuse_input = block->ref_count_; for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { auto owner_node = in_anchor->GetOwnerNode(); GE_IF_BOOL_EXEC(owner_node == nullptr, continue); @@ -425,8 +424,7 @@ void BlockMemAssigner::ReleaseInputNodeOutMemory(const NodePtr &n, vector &reusable_memory) { for (const auto &in_anchor : n->GetAllInDataAnchors()) { if ((in_anchor->GetPeerOutAnchor() == nullptr) || - (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr) || - (n->GetOpDesc() == nullptr)) { + (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr) || (n->GetOpDesc() == nullptr)) { return; } GE_IF_BOOL_EXEC(IsOutputBlock(in_anchor), continue); @@ -434,8 +432,8 @@ void BlockMemAssigner::ReleaseInputNodeOutMemory(const NodePtr &n, auto node_name = in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetName(); GE_IF_BOOL_EXEC((in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetType() == CONSTANT) || - (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetType() == FASTRCNNPREDICTIONS) || - (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetType() == CONSTANTOP), + (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetType() == FASTRCNNPREDICTIONS) || + (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetType() == CONSTANTOP), continue); auto it = node_out_blocks.find(node_name); @@ -506,17 +504,17 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { GetNodeWorkSpaceSize(n, temp); vector workspace_reuse_flag; GE_IF_BOOL_EXEC(!ge::AttrUtils::GetListBool(node_op_desc, kAttrNameWorkspaceReuseFlag, workspace_reuse_flag), - GELOGI("OP %s get workspace_reuse_flag attr failed", node_op_desc->GetName().c_str())); + GELOGD("OP %s get workspace_reuse_flag attr failed", node_op_desc->GetName().c_str())); for (size_t i = 0; i < temp.size(); i++) { if (temp[i] == 0) { zero_memory_list_.emplace_back(n, kWorkspace, static_cast(i)); continue; } MemoryBlock *mem_block = - ApplyMemory(GetBlockSize(static_cast(temp[i]), ranges), static_cast(temp[i]), kWorkspace, n, - static_cast(i), workspace_reuse_flag); + ApplyMemory(GetBlockSize(static_cast(temp[i]), ranges), static_cast(temp[i]), kWorkspace, n, + static_cast(i), workspace_reuse_flag); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mem_block == nullptr, continue, "failed to apply memory block."); - stream_workspace_blocks_[stream_id].emplace_back(mem_block); + CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block); } ReleaseInputNodeOutMemory(n, node_out_blocks_, reusable_blocks_); } @@ -537,6 +535,15 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { } } +void BlockMemAssigner::CheckWorkspaceReuse(const vector &workspace_reuse_flag, uint32_t index, int64_t stream_id, + MemoryBlock *mem_block) { + bool reuse_mem_flag = + ((workspace_reuse_flag.size() > index) && (workspace_reuse_flag[index] == false)) ? false : true; + if (reuse_mem_flag) { + stream_workspace_blocks_[stream_id].emplace_back(mem_block); + } +} + void BlockMemAssigner::GetNodeWorkSpaceSize(const NodePtr &node, vector &workspace_memory) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node->GetOpDesc() == nullptr, return, "Op desc is null."); vector workspace_byte_nums = node->GetOpDesc()->GetWorkspaceBytes(); @@ -632,9 +639,11 @@ void BlockMemAssigner::ResizeMemoryBlocks() { /// @brief given NodeTypeIndex, set offset in Op's OpDef /// @param [in&out] node_type_index /// @param [in] offset offset to be set +/// @param [in] size memory size +/// @param [in] real_size memory size in need /// @return Status result /// -void SetOffsetSize(const NodeTypeIndex &node_type_index, int64_t offset) { +void SetOffsetSize(const NodeTypeIndex &node_type_index, int64_t offset, size_t size, size_t real_size) { ge::OpDescPtr op_desc = node_type_index.node_->GetOpDesc(); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(op_desc == nullptr, return, "op_desc is null."); if (node_type_index.mem_type_ == kOutput) { @@ -673,12 +682,19 @@ void BlockMemAssigner::SetOpMemOffset() { if (memory_block == nullptr || memory_block->deleted_block_) { continue; } + size_t index = 0; + size_t real_size = 0; + auto real_size_list_size = memory_block->RealSizeList().size(); for (const NodeTypeIndex &node_type_index : memory_block->NodeTypeIndexList()) { - SetOffsetSize(node_type_index, memory_block->HeadOffset()); + if (index < real_size_list_size) { + real_size = memory_block->RealSizeList()[index]; + } + SetOffsetSize(node_type_index, memory_block->HeadOffset(), memory_block->Size(), real_size); + index++; } } for (const NodeTypeIndex &node_type_index : zero_memory_list_) { - SetOffsetSize(node_type_index, 0); + SetOffsetSize(node_type_index, 0, 0, 0); } } @@ -783,7 +799,7 @@ void BlockMemAssigner::FindDependentStream(map> bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || - (node_type == HCOMBROADCAST) || (node_type == HCOMALLREDUCE) || (node_type == CONSTANTOP) || - (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN); + (node_type == HCOMBROADCAST) || (node_type == HCOMALLREDUCE) || (node_type == CONSTANTOP) || + (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN); } } // namespace ge diff --git a/src/ge/graph/build/memory/block_mem_assigner.h b/src/ge/graph/build/memory/block_mem_assigner.h index 4a019137..4a826cfc 100644 --- a/src/ge/graph/build/memory/block_mem_assigner.h +++ b/src/ge/graph/build/memory/block_mem_assigner.h @@ -23,7 +23,6 @@ #include #include #include - #include "common/ge_inner_error_codes.h" #include "common/types.h" #include "common/util.h" @@ -218,6 +217,19 @@ class BlockMemAssigner : public MemAssigner { /// /// @ingroup GE + /// @brief check workspace_reuse_flag to judge if add workspace block wait reuse + /// @param [in] workspace_reuse_flag mark out index if support resue + /// @param [in] index out index + /// @param [in] stream_id which stream op in + /// @param [in] mem_block node workspace mem_block + /// @return void + /// @author + /// + void CheckWorkspaceReuse(const vector &workspace_reuse_flag, uint32_t index, int64_t stream_id, + MemoryBlock *mem_block); + + /// + /// @ingroup GE /// @brief Release memory block to reusable list /// @param [in] to_release memory block to be released /// @param [in] reusable_memory reusable list diff --git a/src/ge/graph/build/memory/graph_mem_assigner.cc b/src/ge/graph/build/memory/graph_mem_assigner.cc index 9da8b006..815c8e39 100644 --- a/src/ge/graph/build/memory/graph_mem_assigner.cc +++ b/src/ge/graph/build/memory/graph_mem_assigner.cc @@ -15,11 +15,8 @@ */ #include "graph/build/memory/graph_mem_assigner.h" - #include - #include - #include "framework/common/debug/ge_log.h" #include "graph/build/memory/hybrid_mem_assigner.h" #include "graph/build/memory/var_mem_assign_util.h" @@ -71,7 +68,7 @@ Status GraphMemoryAssigner::AssignMemory() { auto session_id = compute_graph_->GetSessionID(); int64_t var_size_before_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM); auto variable_assigner = - std::unique_ptr(new (std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); + std::unique_ptr(new (std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); if (variable_assigner == nullptr) { GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed."); return ge::FAILED; @@ -87,7 +84,7 @@ Status GraphMemoryAssigner::AssignMemory() { ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() { auto variable_assigner = - std::unique_ptr(new (std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); + std::unique_ptr(new (std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); if (variable_assigner == nullptr) { GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed."); return ge::FAILED; @@ -198,6 +195,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node) GELOGI("Current node %s needs continuous input.", node->GetName().c_str()); for (auto &in_data_anchor : node->GetAllInDataAnchors()) { auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); + if (peer_out_data_anchor == nullptr) { continue; } @@ -239,7 +237,6 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node) return ge::FAILED; } peer_op_desc->SetOutputOffset(output_list); - uint32_t tensor_desc_size = 0; if (ge::TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), tensor_desc_size) != ge::SUCCESS) { @@ -268,7 +265,6 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { output_list[out_data_anchor->GetIdx()] = memory_offset_[0].mem_offset_; - uint32_t tensor_desc_size = 0; if (ge::TensorUtils::GetSize(*(out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx())), tensor_desc_size) != ge::SUCCESS) { @@ -291,8 +287,8 @@ Status GraphMemoryAssigner::ReAssignVirtualConcatMemory() { if (n->GetOpDesc()->GetType() == CONCAT) { int64_t is_node_virtual; GE_IF_BOOL_EXEC( - !(ge::AttrUtils::GetInt(n->GetOpDesc(), "fusion_virtual_op", is_node_virtual)), // Need to change - continue;); + !(ge::AttrUtils::GetInt(n->GetOpDesc(), "fusion_virtual_op", is_node_virtual)), // Need to change + continue;); vector output_list = n->GetOpDesc()->GetOutputOffset(); if (output_list.empty()) { GELOGE(FAILED, "Outputoffset is empty node name:%s", n->GetName().c_str()); @@ -318,7 +314,6 @@ Status GraphMemoryAssigner::ReAssignVirtualConcatMemory() { return ge::FAILED; } peer_op_desc->SetOutputOffset(output_offsets); - ge::ConstGeTensorDescPtr output_desc = peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()); GE_CHECK_NOTNULL(output_desc); int64_t output_mem_size = 0; @@ -328,7 +323,7 @@ Status GraphMemoryAssigner::ReAssignVirtualConcatMemory() { Format format = output_desc->GetFormat(); DataType data_type = output_desc->GetDataType(); graphStatus graph_status = - TensorUtils::CalcTensorMemSize(output_shape, format, data_type, output_mem_size); + TensorUtils::CalcTensorMemSize(output_shape, format, data_type, output_mem_size); if (graph_status != GRAPH_SUCCESS) { GELOGE(graph_status, "CalcTensorMemSize failed!"); return FAILED; @@ -516,9 +511,6 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { } Status GraphMemoryAssigner::AssignReferenceMemory(const ge::NodePtr &node) { - GELOGI("Current node %s needs to support the reference relationship between output and input.", - node->GetName().c_str()); - auto out_op_desc = node->GetOpDesc(); GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); vector output_list = out_op_desc->GetOutputOffset(); @@ -540,8 +532,6 @@ Status GraphMemoryAssigner::AssignReferenceMemory(const ge::NodePtr &node) { auto iter = input_name_index.find(out_data_anchor_name); if (iter != input_name_index.end()) { int index = iter->second; - GELOGI("Reference memory: input anchor index = %d, input anchor name = %s, output anchor name = %s.", index, - iter->first.c_str(), out_data_anchor_name.c_str()); GE_CHECK_NOTNULL(node->GetInDataAnchor(index)); auto peer_out_anchor = node->GetInDataAnchor(index)->GetPeerOutAnchor(); GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); @@ -550,9 +540,6 @@ Status GraphMemoryAssigner::AssignReferenceMemory(const ge::NodePtr &node) { auto peer_out_op_desc = peer_out_node->GetOpDesc(); GE_CHECK_NOTNULL(peer_out_op_desc); output_list[out_data_anchor->GetIdx()] = peer_out_op_desc->GetOutputOffset()[peer_out_anchor_index]; - GELOGI("Reference output : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld]", - node->GetOwnerComputeGraph()->GetName().c_str(), peer_out_op_desc->GetName().c_str(), - out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], peer_out_op_desc->GetStreamId()); } else { GELOGI("Reference output : origin %s name[%s] output[%d] offset is [%ld] stream_id[%ld]", node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), @@ -634,8 +621,8 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) { // If you have already assigned an atomic address, skip it, and you don't need to reassign it. if (is_assigned_mem) { GELOGI( - "[IMAS]Atomic output : we have assigned atomic memory as the input of next node in " - "ReAssignContinuousMemory function."); + "[IMAS]Atomic output : we have assigned atomic memory as the input of next node in " + "ReAssignContinuousMemory function."); continue; } @@ -888,6 +875,6 @@ void GraphMemoryAssigner::AlignMemOffset(const int64_t &mem_align_size) { return; } memory_offset_[0].mem_offset_ = - (memory_offset_[0].mem_offset_ + mem_align_size - 1) / mem_align_size * mem_align_size; + (memory_offset_[0].mem_offset_ + mem_align_size - 1) / mem_align_size * mem_align_size; } } // namespace ge diff --git a/src/ge/graph/build/memory/graph_mem_assigner.h b/src/ge/graph/build/memory/graph_mem_assigner.h index eef8718c..0c26c070 100644 --- a/src/ge/graph/build/memory/graph_mem_assigner.h +++ b/src/ge/graph/build/memory/graph_mem_assigner.h @@ -23,7 +23,6 @@ #include #include #include - #include "framework/common/ge_inner_error_codes.h" #include "graph/node.h" #include "runtime/mem.h" diff --git a/src/ge/graph/build/memory/hybrid_mem_assigner.cc b/src/ge/graph/build/memory/hybrid_mem_assigner.cc index 15fbb312..6165494c 100644 --- a/src/ge/graph/build/memory/hybrid_mem_assigner.cc +++ b/src/ge/graph/build/memory/hybrid_mem_assigner.cc @@ -15,10 +15,8 @@ */ #include "graph/build/memory/hybrid_mem_assigner.h" - #include #include - #include "framework/common/debug/ge_log.h" #include "graph/build/memory/binary_block_mem_assigner.h" #include "graph/build/memory/max_block_mem_assigner.h" diff --git a/src/ge/graph/build/memory/hybrid_mem_assigner.h b/src/ge/graph/build/memory/hybrid_mem_assigner.h index e9aade09..1e78c278 100644 --- a/src/ge/graph/build/memory/hybrid_mem_assigner.h +++ b/src/ge/graph/build/memory/hybrid_mem_assigner.h @@ -18,9 +18,9 @@ #define GE_GRAPH_BUILD_MEMORY_HYBRID_MEM_ASSIGNER_H_ #include - #include "graph/build/memory/mem_assigner.h" #include "graph/compute_graph.h" + #include "common/types.h" #include "common/util.h" diff --git a/src/ge/graph/build/memory/max_block_mem_assigner.cc b/src/ge/graph/build/memory/max_block_mem_assigner.cc index c093403a..db6befeb 100644 --- a/src/ge/graph/build/memory/max_block_mem_assigner.cc +++ b/src/ge/graph/build/memory/max_block_mem_assigner.cc @@ -15,7 +15,6 @@ */ #include "graph/build/memory/max_block_mem_assigner.h" - #include namespace ge { diff --git a/src/ge/graph/build/memory/max_block_mem_assigner.h b/src/ge/graph/build/memory/max_block_mem_assigner.h index 22229737..cb46880a 100644 --- a/src/ge/graph/build/memory/max_block_mem_assigner.h +++ b/src/ge/graph/build/memory/max_block_mem_assigner.h @@ -16,10 +16,8 @@ #ifndef GE_GRAPH_BUILD_MEMORY_MAX_BLOCK_MEM_ASSIGNER_H_ #define GE_GRAPH_BUILD_MEMORY_MAX_BLOCK_MEM_ASSIGNER_H_ - #include #include - #include "graph/build/memory/block_mem_assigner.h" namespace ge { diff --git a/src/ge/graph/build/memory/mem_assigner.h b/src/ge/graph/build/memory/mem_assigner.h index 26cc9f26..b1cb4627 100644 --- a/src/ge/graph/build/memory/mem_assigner.h +++ b/src/ge/graph/build/memory/mem_assigner.h @@ -19,7 +19,6 @@ #include "common/ge_inner_error_codes.h" #include "memory/memory_assigner.h" - namespace ge { static const int64_t kInvalidOffset = -1; diff --git a/src/ge/graph/build/memory/memory_assigner.cc b/src/ge/graph/build/memory/memory_assigner.cc index db05b500..92858b49 100644 --- a/src/ge/graph/build/memory/memory_assigner.cc +++ b/src/ge/graph/build/memory/memory_assigner.cc @@ -15,16 +15,14 @@ */ #include "memory/memory_assigner.h" - #include - #include "framework/common/debug/ge_log.h" #include "graph/build/memory/graph_mem_assigner.h" namespace ge { Status MemoryAssigner::AssignMemory(bool is_loop_graph, size_t &mem_offset) { std::unique_ptr graph_mem_assigner(new (std::nothrow) - ge::GraphMemoryAssigner(compute_graph_)); + ge::GraphMemoryAssigner(compute_graph_)); if (graph_mem_assigner == nullptr) { GELOGE(ge::FAILED, "Alloc GraphMemoryAssigner failed."); return ge::FAILED; diff --git a/src/ge/graph/build/memory/var_mem_assign_util.cc b/src/ge/graph/build/memory/var_mem_assign_util.cc index 46e0348d..25d575c3 100644 --- a/src/ge/graph/build/memory/var_mem_assign_util.cc +++ b/src/ge/graph/build/memory/var_mem_assign_util.cc @@ -15,9 +15,7 @@ */ #include "graph/build/memory/var_mem_assign_util.h" - #include - #include "common/types.h" #include "framework/common/debug/ge_log.h" #include "graph/common/transop_util.h" @@ -56,25 +54,25 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr string ref_var_src_var_name; GE_CHECK_NOTNULL(n->GetOpDesc()); GE_IF_BOOL_EXEC(ge::AttrUtils::GetStr(n->GetOpDesc(), REF_VAR_SRC_VAR_NAME, ref_var_src_var_name), continue); + string node_name = n->GetName(); GE_IF_BOOL_EXEC(n->GetOpDesc()->GetAllOutputsDesc().empty(), GELOGE(FAILED, "node:%s has no OutputDesc.", n->GetName().c_str()); return FAILED); ge::ConstGeTensorDescPtr tensor_desc = n->GetOpDesc()->GetOutputDescPtr(0); GE_CHECK_NOTNULL(tensor_desc); - string node_name = n->GetName(); if (!VarManager::Instance(compute_graph->GetSessionID())->IsVarExist(node_name, *tensor_desc)) { GE_CHK_STATUS_RET( - VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, RT_MEMORY_HBM)); + VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, RT_MEMORY_HBM)); GE_IF_BOOL_EXEC(n->GetType() == VARIABLE, GE_CHK_STATUS_RET(AssignData2Fp32Var(n, compute_graph->GetSessionID()))); GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) - ->SetAllocatedGraphId(node_name, compute_graph->GetGraphID())); + ->SetAllocatedGraphId(node_name, compute_graph->GetGraphID())); } uint8_t *dev_ptr = nullptr; rtMemType_t memory_type = RT_MEMORY_HBM; - GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) - ->GetVarAddr(node_name, *tensor_desc, &dev_ptr, memory_type)); + GE_CHK_STATUS_RET( + VarManager::Instance(compute_graph->GetSessionID())->GetVarAddr(node_name, *tensor_desc, &dev_ptr, memory_type)); vector output_list = n->GetOpDesc()->GetOutputOffset(); GE_IF_BOOL_EXEC(output_list.empty(), return FAILED); output_list[0] = static_cast(reinterpret_cast(dev_ptr)); @@ -92,9 +90,9 @@ Status VarMemAssignUtil::AssignData2Fp32Var(const ge::NodePtr &node, uint64_t se rtMemType_t memory_type = RT_MEMORY_HBM; GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetCurVarDesc(src_var_name, cur_tensor_desc)); GE_CHK_STATUS_RET( - VarManager::Instance(session_id)->GetVarAddr(src_var_name, cur_tensor_desc, &dev_ptr, memory_type)); + VarManager::Instance(session_id)->GetVarAddr(src_var_name, cur_tensor_desc, &dev_ptr, memory_type)); GE_CHK_STATUS_RET( - VarManager::Instance(session_id)->SetVarAddr(node->GetName(), cur_tensor_desc, dev_ptr, memory_type)); + VarManager::Instance(session_id)->SetVarAddr(node->GetName(), cur_tensor_desc, dev_ptr, memory_type)); } return SUCCESS; } @@ -124,13 +122,12 @@ Status VarMemAssignUtil::SetOutVariableAttr(const ge::NodePtr &node, const ge::N GeTensorDesc var_tensor_desc = var_node->GetOpDesc()->GetOutputDesc(0); rtMemType_t memory_type = RT_MEMORY_HBM; GE_CHK_STATUS_RET( - VarManager::Instance(session_id)->GetVarAddr(var_node->GetName(), var_tensor_desc, &dev_ptr, memory_type)); + VarManager::Instance(session_id)->GetVarAddr(var_node->GetName(), var_tensor_desc, &dev_ptr, memory_type)); int out_list_size = static_cast(output_list.size()); GE_CHK_BOOL_RET_STATUS(index < out_list_size, FAILED, "index %d >= output_list.size() %d", index, out_list_size); output_list[index] = static_cast(reinterpret_cast(dev_ptr)); - GELOGI("Assign node outputOffset[index] is: %ld", output_list[index]); node->GetOpDesc()->SetOutputOffset(output_list); return SUCCESS; @@ -142,8 +139,7 @@ Status VarMemAssignUtil::DealExportVariableNode(const ge::NodePtr &node, const g GE_IF_BOOL_EXEC(var_out_anchor == nullptr, return FAILED); for (const ge::InDataAnchorPtr &dst_in_var_anchor : var_out_anchor->GetPeerInDataAnchors()) { ge::NodePtr dst_node = dst_in_var_anchor->GetOwnerNode(); - if ((dst_node->GetType() == ASSIGN) || (dst_node->GetType() == ASSIGNADD) || - (dst_node->GetType() == ASSIGNSUB)) { + if ((dst_node->GetType() == ASSIGN) || (dst_node->GetType() == ASSIGNADD) || (dst_node->GetType() == ASSIGNSUB)) { if (dst_in_var_anchor == dst_node->GetInDataAnchor(0)) { GE_CHK_STATUS_RET(DealExportVariableNode(dst_node, var_node, session_id)); } @@ -174,7 +170,7 @@ Status VarMemAssignUtil::DealBroadCastNode(uint32_t graph_id, const ge::NodePtr "Get broadcast op %s input tensor desc size [%zu] < idx [%d]", node->GetName().c_str(), input_tensor_desc_ptr_vistor.size(), broad_cast_info.idx); const ge::GeTensorDescPtr input_tensor_desc = - input_tensor_desc_ptr_vistor.at(static_cast(broad_cast_info.idx)); + input_tensor_desc_ptr_vistor.at(static_cast(broad_cast_info.idx)); uint32_t input_size = 0; GE_CHK_STATUS(TensorUtils::GetSize(*input_tensor_desc, input_size), "get input size failed."); broad_cast_info.input_size = input_size; @@ -193,7 +189,7 @@ Status VarMemAssignUtil::DealBroadCastNode(uint32_t graph_id, const ge::NodePtr "Get broadcast op %s output tensor desc size [%zu] < idx [%d]", node->GetName().c_str(), output_tensor_desc_ptr_vistor.size(), broad_cast_info.idx); const ge::GeTensorDescPtr output_tensor_desc = - output_tensor_desc_ptr_vistor.at(static_cast(broad_cast_info.idx)); + output_tensor_desc_ptr_vistor.at(static_cast(broad_cast_info.idx)); uint32_t output_size = 0; GE_CHK_STATUS(TensorUtils::GetSize(*output_tensor_desc, output_size), "get input size failed."); broad_cast_info.output_size = output_size; @@ -216,15 +212,14 @@ Status VarMemAssignUtil::DealVariableNode(uint32_t graph_id, const ge::NodePtr & continue; } - if ((dst_node->GetType() == ASSIGN) || (dst_node->GetType() == ASSIGNADD) || - (dst_node->GetType() == ASSIGNSUB)) { + if ((dst_node->GetType() == ASSIGN) || (dst_node->GetType() == ASSIGNADD) || (dst_node->GetType() == ASSIGNSUB)) { if (dst_in_data_anchor == dst_node->GetInDataAnchor(0)) { GE_CHK_STATUS_RET(DealExportVariableNode(dst_node, node, session_id)); } } auto dst_type = dst_node->GetType(); - bool is_trans_node = (dst_type == TRANSDATA) || (dst_type == CAST) || (dst_type == TRANSPOSE) || - (dst_type == PERMUTE); + bool is_trans_node = + (dst_type == TRANSDATA) || (dst_type == CAST) || (dst_type == TRANSPOSE) || (dst_type == PERMUTE); if (is_trans_node) { NodePtr final_trans_node = GetFinalTransNode(dst_node); GE_CHK_STATUS_RET(DealTransNode(final_trans_node)); @@ -241,8 +236,8 @@ ge::NodePtr VarMemAssignUtil::GetFinalTransNode(const ge::NodePtr &trans_node) { for (const auto &dst_in_anchor : trans_out_data_anchor->GetPeerInDataAnchors()) { NodePtr dst_node = dst_in_anchor->GetOwnerNode(); auto dst_type = dst_node->GetType(); - bool is_trans_node = (dst_type == TRANSDATA) || (dst_type == CAST) || (dst_type == TRANSPOSE) || - (dst_type == PERMUTE); + bool is_trans_node = + (dst_type == TRANSDATA) || (dst_type == CAST) || (dst_type == TRANSPOSE) || (dst_type == PERMUTE); if (is_trans_node && (dst_in_anchor->GetIdx() == 0)) { final_ref_node = GetFinalTransNode(dst_node); } @@ -256,8 +251,7 @@ Status VarMemAssignUtil::DealTransNode(const ge::NodePtr &final_trans_node) { GE_IF_BOOL_EXEC(final_trans_out_anchor == nullptr, return SUCCESS); for (const ge::InDataAnchorPtr &dst_in_var_anchor : final_trans_out_anchor->GetPeerInDataAnchors()) { ge::NodePtr dst_node = dst_in_var_anchor->GetOwnerNode(); - if ((dst_node->GetType() == ASSIGN) || (dst_node->GetType() == ASSIGNADD) || - (dst_node->GetType() == ASSIGNSUB)) { + if ((dst_node->GetType() == ASSIGN) || (dst_node->GetType() == ASSIGNADD) || (dst_node->GetType() == ASSIGNSUB)) { GE_CHK_STATUS_RET(DealExportTransNode(dst_node, final_trans_node)); } } @@ -269,8 +263,7 @@ Status VarMemAssignUtil::DealExportTransNode(const ge::NodePtr &node, const ge:: GE_CHECK_NOTNULL(node_out_anchor); for (const ge::InDataAnchorPtr &dst_in_var_anchor : node_out_anchor->GetPeerInDataAnchors()) { ge::NodePtr dst_node = dst_in_var_anchor->GetOwnerNode(); - if ((dst_node->GetType() == ASSIGN) || (dst_node->GetType() == ASSIGNADD) || - (dst_node->GetType() == ASSIGNSUB)) { + if ((dst_node->GetType() == ASSIGN) || (dst_node->GetType() == ASSIGNADD) || (dst_node->GetType() == ASSIGNSUB)) { GE_CHK_STATUS_RET(DealExportTransNode(dst_node, final_trans_node)); } } diff --git a/src/ge/graph/build/memory/var_mem_assign_util.h b/src/ge/graph/build/memory/var_mem_assign_util.h index d6ebe210..036fed06 100644 --- a/src/ge/graph/build/memory/var_mem_assign_util.h +++ b/src/ge/graph/build/memory/var_mem_assign_util.h @@ -16,9 +16,7 @@ #ifndef GE_GRAPH_BUILD_MEMORY_VAR_MEM_ASSIGN_UTIL_H_ #define GE_GRAPH_BUILD_MEMORY_VAR_MEM_ASSIGN_UTIL_H_ - #include - #include "common/debug/log.h" #include "common/ge_inner_error_codes.h" #include "graph/utils/node_utils.h" diff --git a/src/ge/graph/build/model_builder.cc b/src/ge/graph/build/model_builder.cc index 72563dba..fd4ffe8b 100644 --- a/src/ge/graph/build/model_builder.cc +++ b/src/ge/graph/build/model_builder.cc @@ -15,11 +15,8 @@ */ #include "graph/build/model_builder.h" - #include #include -#include - #include "common/ge/ge_util.h" #include "framework/common/debug/ge_log.h" #include "graph/anchor.h" @@ -45,22 +42,31 @@ #include "omg/version.h" #include "register/op_registry.h" -using std::string; -using std::vector; -using std::map; -using std::set; -using domi::DOMI_TENSOR_NC1HWC0; using domi::AippOpParams; +using domi::DOMI_TENSOR_NC1HWC0; using domi::ModelTaskDef; using ge::FAILED; -using ge::SUCCESS; using ge::PARAM_INVALID; +using ge::SUCCESS; +using std::map; +using std::set; +using std::string; +using std::vector; namespace { const uint32_t kWeightsStartOffset = 512; const int32_t kWrongIndex = -2; + +const float kImgRatioYUV420SP_U8 = 1.5; +const int kImgRatioRGB888_U8 = 3; +const int kImgRatioNC1HWC0DI_FP16 = 12; const int kInvalidIndexNum = -1; +const uint32_t kInputDimensions2D = 2; +const uint32_t kInputDimensions3D = 3; + +const set adjust_layer_type_ = {ge::CONVOLUTION}; + bool IsGeLocalOp(const ge::ConstOpDescPtr &op_desc) { auto type = op_desc->GetType(); if (type == ge::CONSTANTOP) { @@ -68,10 +74,9 @@ bool IsGeLocalOp(const ge::ConstOpDescPtr &op_desc) { ge::GeTensorDesc output_desc = op_desc->GetOutputDesc(0); return !(output_desc.GetDataType() == ge::DT_STRING); } - const set ge_local_set = {ge::STREAMMERGE, ge::MEMCPYASYNC, ge::STREAMACTIVE, ge::STREAMSWITCH, - ge::VARIABLE, ge::NOOP, ge::CONSTANT, ge::ENTER, - ge::REFENTER, ge::LOOPCOND, ge::NEXTITERATION, ge::REFNEXTITERATION, - ge::EXIT, ge::REFEXIT}; + const set ge_local_set = { + ge::STREAMMERGE, ge::MEMCPYASYNC, ge::STREAMACTIVE, ge::STREAMSWITCH, ge::VARIABLE, ge::NOOP, ge::CONSTANT, + ge::ENTER, ge::REFENTER, ge::LOOPCOND, ge::NEXTITERATION, ge::REFNEXTITERATION, ge::EXIT, ge::REFEXIT}; return (ge_local_set.find(type) != ge_local_set.end()); } } // namespace @@ -252,12 +257,12 @@ Status ModelBuilder::AssignMemory() { void ModelBuilder::AddNodeInputProperty() { for (const ge::NodePtr &node : compute_graph_->GetDirectNode()) { auto node_op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return); + GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return ); vector src_name_list; vector src_index_list; for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); - GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, GELOGW("peer_out_anchor is nullptr!"); continue); + GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); GE_IF_BOOL_EXEC(node_op_desc->HasAttr(MERGE_PRENODE_FLAG), continue); ge::NodePtr src_node = peer_out_anchor->GetOwnerNode(); @@ -279,10 +284,10 @@ void ModelBuilder::AddNodeInputProperty() { for (const ge::NodePtr &node : compute_graph_->GetDirectNode()) { auto node_op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return); + GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return ); GE_IF_BOOL_EXEC(node_op_desc->GetType() == NETOUTPUT, continue); auto out_control_anchor = node->GetOutControlAnchor(); - GE_IF_BOOL_EXEC(out_control_anchor == nullptr, GELOGW("out_control_anchor is nullptr"); return); + GE_IF_BOOL_EXEC(out_control_anchor == nullptr, GELOGW("out_control_anchor is nullptr"); return ); vector dst_name_list; vector dst_index_list; string dst_name_temp; @@ -300,7 +305,7 @@ void ModelBuilder::AddNodeInputProperty() { dst_name_temp = ""; int64_t dst_index = kWrongIndex; // assign an impossible value to dst_index. for (const auto &in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { - GE_IF_BOOL_EXEC(in_data_anchor == nullptr, GELOGW("in_data_anchor is nullptr"); return); + GE_IF_BOOL_EXEC(in_data_anchor == nullptr, GELOGW("in_data_anchor is nullptr"); return ); ge::NodePtr dst_node = in_data_anchor->GetOwnerNode(); dst_name_temp = dst_name_temp.empty() ? dst_node->GetName() : dst_name_temp + ":" + dst_node->GetName(); dst_index = in_data_anchor->GetIdx(); @@ -378,20 +383,23 @@ void ModelBuilder::ClearOriginalFormat() { } } - GE_IF_BOOL_EXEC(node_op_desc->HasAttr(ATTR_NAME_INFERRED_FORMAT), - if (node_op_desc->DelAttr(ATTR_NAME_INFERRED_FORMAT) != SUCCESS) { - GELOGW("DelAttr ATTR_NAME_INFERRED_FORMAT failed."); - }); - - GE_IF_BOOL_EXEC(node_op_desc->HasAttr(ATTR_NAME_PRED_PERMUTE_DELETED), - if (node_op_desc->DelAttr(ATTR_NAME_PRED_PERMUTE_DELETED) != SUCCESS) { - GELOGW("DelAttr ATTR_NAME_PRED_PERMUTE_DELETED failed."); - }); - - GE_IF_BOOL_EXEC(node_op_desc->HasAttr(ATTR_NAME_IGNORE_PRED_FORMAT), - if (node_op_desc->DelAttr(ATTR_NAME_IGNORE_PRED_FORMAT) != SUCCESS) { - GELOGW("DelAttr ATTR_NAME_IGNORE_PRED_FORMAT failed."); - }); + GE_IF_BOOL_EXEC( + node_op_desc->HasAttr(ATTR_NAME_INFERRED_FORMAT), + if (node_op_desc->DelAttr(ATTR_NAME_INFERRED_FORMAT) != SUCCESS) { + GELOGW("DelAttr ATTR_NAME_INFERRED_FORMAT failed."); + }); + + GE_IF_BOOL_EXEC( + node_op_desc->HasAttr(ATTR_NAME_PRED_PERMUTE_DELETED), + if (node_op_desc->DelAttr(ATTR_NAME_PRED_PERMUTE_DELETED) != SUCCESS) { + GELOGW("DelAttr ATTR_NAME_PRED_PERMUTE_DELETED failed."); + }); + + GE_IF_BOOL_EXEC( + node_op_desc->HasAttr(ATTR_NAME_IGNORE_PRED_FORMAT), + if (node_op_desc->DelAttr(ATTR_NAME_IGNORE_PRED_FORMAT) != SUCCESS) { + GELOGW("DelAttr ATTR_NAME_IGNORE_PRED_FORMAT failed."); + }); } } } @@ -441,8 +449,8 @@ Status ModelBuilder::MergeWeights() { if (weight_data.data() != nullptr) { GE_IF_BOOL_EXEC(base_addr == nullptr, GELOGE(FAILED, "Base addr is nullptr."); return FAILED); GE_CHK_BOOL_EXEC( - memcpy_s(base_addr + offset, weight_offset_ - offset, weight_data.data(), weight_data.size()) == EOK, - return FAILED, "call memcpy_s failed."); + memcpy_s(base_addr + offset, weight_offset_ - offset, weight_data.data(), weight_data.size()) == EOK, + return FAILED, "call memcpy_s failed."); } weight_data.clear(); @@ -545,7 +553,6 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) { } ge::Buffer ModelBuilder::GetWeightBuffer() const { return weight_buffer_; } - Status ModelBuilder::CompileSingleOp() { GELOGD("Begin to compile single op."); // Create ge instance diff --git a/src/ge/graph/build/model_builder.h b/src/ge/graph/build/model_builder.h index 1121a31d..b6eee6aa 100644 --- a/src/ge/graph/build/model_builder.h +++ b/src/ge/graph/build/model_builder.h @@ -23,7 +23,6 @@ #include #include #include - #include "common/op/ge_op_utils.h" #include "common/tbe_kernel_store.h" #include "common/types.h" diff --git a/src/ge/graph/build/optimize_stream_graph.cc b/src/ge/graph/build/optimize_stream_graph.cc index 7798d1e6..f6cc5071 100644 --- a/src/ge/graph/build/optimize_stream_graph.cc +++ b/src/ge/graph/build/optimize_stream_graph.cc @@ -22,6 +22,7 @@ #include #include "common/util.h" #include "framework/common/debug/ge_log.h" + #include "graph/utils/node_utils.h" #include "graph/utils/tensor_utils.h" #include "init/gelib.h" @@ -43,10 +44,9 @@ void OptimizeStreamGraph::RefreshNodeId(const ComputeGraphPtr &comp_graph, vecto continue; } for (ge::NodePtr &node : sub_graph->GetAllNodes()) { - GE_CHECK_NOTNULL_EXEC(node->GetOpDesc(), return); + GE_CHECK_NOTNULL_EXEC(node->GetOpDesc(), return ); if ((node->GetType() == END) || (node->GetType() == PLACEHOLDER)) { node->GetOpDesc()->SetId(static_cast(node_size)); - GELOGI("Refresh node %s nodeId: %ld", node->GetName().c_str(), node->GetOpDesc()->GetId()); node_size++; } } @@ -69,7 +69,7 @@ bool OptimizeStreamGraph::IsSameStreamId(const ComputeGraphPtr &comp_graph) { stream_set.insert(stream_id); } if (stream_set.size() > 1) { - GELOGI("Nodes of graph: %s have different stream id, node num: %zu, different stream num: %zu.", + GELOGD("Nodes of graph: %s have different stream id, node num: %zu, different stream num: %zu.", comp_graph->GetName().c_str(), comp_graph->GetDirectNodesSize(), stream_set.size()); return false; } @@ -98,15 +98,13 @@ Status OptimizeStreamGraph::OptimizeStreamedSubGraph(const ComputeGraphPtr &comp vector graph_optimizers; if (instance->DNNEngineManagerObj().IsEngineRegistered(engine_name)) { instance->OpsKernelManagerObj().GetGraphOptimizerByEngine(engine_name, graph_optimizers); - GELOGI("Subgraph: %s start optimize streamed graph. engineName: %s, subgraph num: %zu, graph Optimizer num: %zu.", - sub_graph->GetName().c_str(), engine_name.c_str(), subgraph_infos.size(), graph_optimizers.size()); auto nodes = sub_graph->GetAllNodes(); if (nodes.empty()) { continue; } if (!IsSameStreamId(sub_graph)) { - GELOGI("There are more than one stream in subgraph %s", sub_graph->GetName().c_str()); + GELOGD("There are more than one stream in subgraph %s", sub_graph->GetName().c_str()); continue; } OpDescPtr op_desc = nodes.at(0)->GetOpDesc(); @@ -130,10 +128,10 @@ Status OptimizeStreamGraph::OptimizeStreamedSubGraph(const ComputeGraphPtr &comp sub_graph->GetName().c_str(), engine_name.c_str(), graph_optimizers.size(), ret); return ret; } - GELOGI( - "[optimizeStreamedSubGraph]: optimize streamed subgraph success, subgraph: %s, engine_name: %s, graph " - "Optimizer num: %zu!", - sub_graph->GetName().c_str(), engine_name.c_str(), graph_optimizers.size()); + GELOGD( + "[optimizeStreamedSubGraph]: optimize streamed subgraph success, subgraph: %s, engine_name: %s, graph " + "Optimizer num: %zu!", + sub_graph->GetName().c_str(), engine_name.c_str(), graph_optimizers.size()); } } } diff --git a/src/ge/graph/build/optimize_stream_graph.h b/src/ge/graph/build/optimize_stream_graph.h index 03f4006a..de1ca01c 100644 --- a/src/ge/graph/build/optimize_stream_graph.h +++ b/src/ge/graph/build/optimize_stream_graph.h @@ -18,7 +18,6 @@ #define GE_GRAPH_BUILD_OPTIMIZE_STREAM_GRAPH_H_ #include - #include "common/ge_inner_error_codes.h" #include "common/opskernel/ops_kernel_info_types.h" #include "framework/common/types.h" diff --git a/src/ge/graph/build/run_context.cc b/src/ge/graph/build/run_context.cc index a52d7dba..fa13c898 100644 --- a/src/ge/graph/build/run_context.cc +++ b/src/ge/graph/build/run_context.cc @@ -15,8 +15,8 @@ */ #include "graph/build/run_context.h" - #include "framework/common/debug/ge_log.h" + #include "common/util.h" #include "graph/debug/ge_attr_define.h" @@ -144,4 +144,5 @@ Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &gra } RunContext &RunContextUtil::GetRunContext() { return run_context_; } + } // namespace ge diff --git a/src/ge/graph/build/run_context.h b/src/ge/graph/build/run_context.h index 688ce83f..90579c77 100644 --- a/src/ge/graph/build/run_context.h +++ b/src/ge/graph/build/run_context.h @@ -18,7 +18,6 @@ #define GE_GRAPH_BUILD_RUN_CONTEXT_H_ #include - #include "common/ge_inner_error_codes.h" #include "common/opskernel/ops_kernel_info_types.h" #include "framework/common/types.h" @@ -37,8 +36,7 @@ class RunContextUtil { Status InitMemInfo(uint8_t *data_mem_base, uint64_t data_mem_size, uint8_t *weight_mem_base, uint64_t weight_mem_size); - Status CreateRunContext(Model &model_def, const ComputeGraphPtr &graph, Buffer &buffer, - const uint64_t session_id); + Status CreateRunContext(Model &model_def, const ComputeGraphPtr &graph, Buffer &buffer, const uint64_t session_id); RunContext &GetRunContext(); diff --git a/src/ge/graph/build/stream_allocator.cc b/src/ge/graph/build/stream_allocator.cc index 19816d69..5fbd3693 100644 --- a/src/ge/graph/build/stream_allocator.cc +++ b/src/ge/graph/build/stream_allocator.cc @@ -15,22 +15,21 @@ */ #include "graph/build/stream_allocator.h" - #include - #include "common/ge/ge_util.h" #include "framework/common/debug/ge_log.h" #include "framework/common/fmk_error_codes.h" #include "framework/common/types.h" -#include "graph/build/logical_stream_allocator.h" #include "graph/debug/ge_attr_define.h" #include "graph/utils/graph_utils.h" #include "init/gelib.h" -using std::string; -using std::vector; +#include "graph/build/logical_stream_allocator.h" + using std::map; using std::set; +using std::string; +using std::vector; namespace { const int64_t kMaxNodeNumInNormalStream = 350; @@ -194,9 +193,9 @@ Status StreamAllocator::SplitStreams() { if (stream_node_num_vec[stream_id] > max_node_num_one_stream) { last_stream_id++; GELOGI( - "stream_node_num_vec[%ld]= %ld > max_node_num_one_stream : %ld, " - "It's time to split the stream, split newly-added stream id is %ld", - stream_id, stream_node_num_vec[stream_id], max_node_num_one_stream, last_stream_id); + "stream_node_num_vec[%ld]= %ld > max_node_num_one_stream : %ld, " + "It's time to split the stream, split newly-added stream id is %ld", + stream_id, stream_node_num_vec[stream_id], max_node_num_one_stream, last_stream_id); stream_node_num_vec[stream_id] = 1; added_stream_num_vec[stream_id]++; @@ -575,6 +574,10 @@ bool StreamAllocator::IsRecvNodeActivatedBySendNode(const NodePtr &send_node_ptr pre_activate_stream_node->GetOpDesc()->GetId() >= send_node_ptr->GetOpDesc()->GetId()) { return true; } + auto in_nodes_of_pre = pre_activate_stream_node->GetInNodes(); + if (std::find(in_nodes_of_pre.begin(), in_nodes_of_pre.end(), send_node_ptr) != in_nodes_of_pre.end()) { + return true; + } } auto iterator = specific_activated_streams_nodes_map_.find(activate_stream_node->GetOpDesc()->GetStreamId()); if (iterator != specific_activated_streams_nodes_map_.end()) { @@ -916,9 +919,9 @@ Status StreamAllocator::InsertActiveEntryStream(const std::vector &act GELOGI("Create StreamActive op:%s.", op_desc->GetName().c_str()); GE_CHK_BOOL_EXEC( - AttrUtils::SetListStr(op_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, std::move(std::vector())), - GELOGE(FAILED, "SetListStr failed."); - return FAILED); + AttrUtils::SetListStr(op_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, std::move(std::vector())), + GELOGE(FAILED, "SetListStr failed."); + return FAILED); NodePtr active_node = whole_graph_->AddNodeFront(op_desc); GE_IF_BOOL_EXEC(active_node == nullptr, diff --git a/src/ge/graph/build/stream_allocator.h b/src/ge/graph/build/stream_allocator.h index 6804e52c..e3901205 100644 --- a/src/ge/graph/build/stream_allocator.h +++ b/src/ge/graph/build/stream_allocator.h @@ -22,7 +22,6 @@ #include #include #include - #include "engine_manager/dnnengine_manager.h" #include "framework/common/ge_inner_error_codes.h" #include "graph/compute_graph.h" diff --git a/src/ge/graph/build/task_generator.cc b/src/ge/graph/build/task_generator.cc index 0d81c548..2b9e30af 100644 --- a/src/ge/graph/build/task_generator.cc +++ b/src/ge/graph/build/task_generator.cc @@ -15,13 +15,12 @@ */ #include "graph/build/task_generator.h" - #include #include - -#include "common/types.h" #include "common/util.h" +#include "common/types.h" #include "framework/common/debug/ge_log.h" + #include "graph/debug/ge_attr_define.h" #include "graph/ge_context.h" #include "graph/manager/graph_var_manager.h" @@ -31,12 +30,12 @@ #include "graph/utils/type_utils.h" #include "init/gelib.h" +using domi::LogTimeStampDef; +using domi::ModelTaskDef; +using domi::TaskDef; +using std::map; using std::string; using std::vector; -using std::map; -using domi::TaskDef; -using domi::ModelTaskDef; -using domi::LogTimeStampDef; namespace { const char *const kIsFirstNode = "is_first_node"; @@ -122,13 +121,13 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t Status TaskGenerator::AddModelTaskToModel(const ModelTaskDef &model_task_def, uint64_t session_id, ge::Model &model, RunContext &run_context) { GE_CHK_BOOL_EXEC( - AttrUtils::SetInt(model, MODEL_ATTR_TASK_GEN_BASE_ADDR, reinterpret_cast(run_context.dataMemBase)), - GELOGE(FAILED, "SetInt MODEL_ATTR_TASK_GEN_BASE_ADDR failed."); - return FAILED); + AttrUtils::SetInt(model, MODEL_ATTR_TASK_GEN_BASE_ADDR, reinterpret_cast(run_context.dataMemBase)), + GELOGE(FAILED, "SetInt MODEL_ATTR_TASK_GEN_BASE_ADDR failed."); + return FAILED); GE_CHK_BOOL_EXEC( - AttrUtils::SetInt(model, MODEL_ATTR_TASK_GEN_WEIGHT_ADDR, reinterpret_cast(run_context.weightMemBase)), - GELOGE(FAILED, "SetInt MODEL_ATTR_TASK_GEN_WEIGHT_ADDR failed."); - return FAILED); + AttrUtils::SetInt(model, MODEL_ATTR_TASK_GEN_WEIGHT_ADDR, reinterpret_cast(run_context.weightMemBase)), + GELOGE(FAILED, "SetInt MODEL_ATTR_TASK_GEN_WEIGHT_ADDR failed."); + return FAILED); GE_CHK_BOOL_EXEC(AttrUtils::SetInt(model, ATTR_MODEL_TASK_GEN_VAR_ADDR, reinterpret_cast(var_mem_base_)), GELOGE(FAILED, "SetInt ATTR_MODEL_TASK_GEN_VAR_ADDR failed."); return FAILED); @@ -431,7 +430,7 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const return SUCCESS; } if (ppoint.fp_index == node_index) { - uint64_t jobid_log_id = ge::GetContext().JobId(); + uint64_t jobid_log_id = ge::GetContext().TraceId(); GELOGI("The first FP operator is %s, idx %u, job_id %lu", op_desc->GetName().c_str(), node_index, jobid_log_id); TaskDef job_task_def; diff --git a/src/ge/graph/build/task_generator.h b/src/ge/graph/build/task_generator.h index ad9c1388..ed89df47 100644 --- a/src/ge/graph/build/task_generator.h +++ b/src/ge/graph/build/task_generator.h @@ -21,7 +21,6 @@ #include #include #include - #include "common/ge_inner_error_codes.h" #include "common/opskernel/ops_kernel_info_types.h" #include "framework/common/types.h" diff --git a/src/ge/graph/common/omg_util.cc b/src/ge/graph/common/omg_util.cc index 334bcdc4..31c3f06f 100644 --- a/src/ge/graph/common/omg_util.cc +++ b/src/ge/graph/common/omg_util.cc @@ -18,6 +18,7 @@ #include +#include "common/op/attr_define.h" #include "framework/common/debug/ge_log.h" #include "graph/debug/ge_attr_define.h" #include "graph/utils/graph_utils.h" diff --git a/src/ge/graph/execute/graph_execute.cc b/src/ge/graph/execute/graph_execute.cc index d686791e..a1a30922 100644 --- a/src/ge/graph/execute/graph_execute.cc +++ b/src/ge/graph/execute/graph_execute.cc @@ -15,10 +15,7 @@ */ #include "graph/execute/graph_execute.h" - -#include #include - #include "common/ge_inner_error_codes.h" #include "common/model_parser/base.h" #include "graph/load/new_model_manager/model_manager.h" @@ -27,6 +24,10 @@ #include "runtime/mem.h" namespace ge { +namespace { +const char ENGINE_AI_CORE[] = "DNN_V100"; +} // namespace + GraphExecutor::GraphExecutor() : init_flag_(false), train_graph_flag_(false), @@ -472,7 +473,7 @@ Status GraphExecutor::GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vecto auto model_manager = ge::ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); Status ret = - model_manager->GetInputOutputDescInfoForZeroCopy(model_id, input_desc, output_desc, input_formats, out_formats); + model_manager->GetInputOutputDescInfoForZeroCopy(model_id, input_desc, output_desc, input_formats, out_formats); if (ret != SUCCESS) { GELOGE(ret, "GetInputOutputDescInfoForZeroCopy failed."); return ret; diff --git a/src/ge/graph/load/new_model_manager/data_dumper.cc b/src/ge/graph/load/new_model_manager/data_dumper.cc index e4f1a5af..d99e8452 100644 --- a/src/ge/graph/load/new_model_manager/data_dumper.cc +++ b/src/ge/graph/load/new_model_manager/data_dumper.cc @@ -15,19 +15,16 @@ */ #include "graph/load/new_model_manager/data_dumper.h" - -#include - -#include "common/properties_manager.h" -#include "framework/common/debug/ge_log.h" -#include "framework/common/util.h" -#include "graph/anchor.h" -#include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/model_utils.h" #include "graph/utils/attr_utils.h" -#include "proto/ge_ir.pb.h" +#include "graph/debug/ge_attr_define.h" +#include "framework/common/debug/ge_log.h" #include "proto/op_mapping_info.pb.h" +#include "proto/ge_ir.pb.h" #include "runtime/mem.h" +#include "common/properties_manager.h" +#include "framework/common/util.h" +#include "model_utils.h" +#include "graph/anchor.h" namespace { const uint32_t kAicpuLoadFlag = 1; @@ -36,32 +33,32 @@ const uint32_t kAicpuUnloadFlag = 0; static int32_t GetIrDataType(ge::DataType data_type) { static const std::map data_type_map = { - {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED}, - {ge::DT_FLOAT, ge::proto::DT_FLOAT}, - {ge::DT_FLOAT16, ge::proto::DT_FLOAT16}, - {ge::DT_INT8, ge::proto::DT_INT8}, - {ge::DT_UINT8, ge::proto::DT_UINT8}, - {ge::DT_INT16, ge::proto::DT_INT16}, - {ge::DT_UINT16, ge::proto::DT_UINT16}, - {ge::DT_INT32, ge::proto::DT_INT32}, - {ge::DT_INT64, ge::proto::DT_INT64}, - {ge::DT_UINT32, ge::proto::DT_UINT32}, - {ge::DT_UINT64, ge::proto::DT_UINT64}, - {ge::DT_BOOL, ge::proto::DT_BOOL}, - {ge::DT_DOUBLE, ge::proto::DT_DOUBLE}, - {ge::DT_DUAL, ge::proto::DT_DUAL}, - {ge::DT_DUAL_SUB_INT8, ge::proto::DT_DUAL_SUB_INT8}, - {ge::DT_DUAL_SUB_UINT8, ge::proto::DT_DUAL_SUB_UINT8}, - {ge::DT_COMPLEX64, ge::proto::DT_COMPLEX64}, - {ge::DT_COMPLEX128, ge::proto::DT_COMPLEX128}, - {ge::DT_QINT8, ge::proto::DT_QINT8}, - {ge::DT_QINT16, ge::proto::DT_QINT16}, - {ge::DT_QINT32, ge::proto::DT_QINT32}, - {ge::DT_QUINT8, ge::proto::DT_QUINT8}, - {ge::DT_QUINT16, ge::proto::DT_QUINT16}, - {ge::DT_RESOURCE, ge::proto::DT_RESOURCE}, - {ge::DT_STRING_REF, ge::proto::DT_STRING_REF}, - {ge::DT_STRING, ge::proto::DT_STRING}, + {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED}, + {ge::DT_FLOAT, ge::proto::DT_FLOAT}, + {ge::DT_FLOAT16, ge::proto::DT_FLOAT16}, + {ge::DT_INT8, ge::proto::DT_INT8}, + {ge::DT_UINT8, ge::proto::DT_UINT8}, + {ge::DT_INT16, ge::proto::DT_INT16}, + {ge::DT_UINT16, ge::proto::DT_UINT16}, + {ge::DT_INT32, ge::proto::DT_INT32}, + {ge::DT_INT64, ge::proto::DT_INT64}, + {ge::DT_UINT32, ge::proto::DT_UINT32}, + {ge::DT_UINT64, ge::proto::DT_UINT64}, + {ge::DT_BOOL, ge::proto::DT_BOOL}, + {ge::DT_DOUBLE, ge::proto::DT_DOUBLE}, + {ge::DT_DUAL, ge::proto::DT_DUAL}, + {ge::DT_DUAL_SUB_INT8, ge::proto::DT_DUAL_SUB_INT8}, + {ge::DT_DUAL_SUB_UINT8, ge::proto::DT_DUAL_SUB_UINT8}, + {ge::DT_COMPLEX64, ge::proto::DT_COMPLEX64}, + {ge::DT_COMPLEX128, ge::proto::DT_COMPLEX128}, + {ge::DT_QINT8, ge::proto::DT_QINT8}, + {ge::DT_QINT16, ge::proto::DT_QINT16}, + {ge::DT_QINT32, ge::proto::DT_QINT32}, + {ge::DT_QUINT8, ge::proto::DT_QUINT8}, + {ge::DT_QUINT16, ge::proto::DT_QUINT16}, + {ge::DT_RESOURCE, ge::proto::DT_RESOURCE}, + {ge::DT_STRING_REF, ge::proto::DT_STRING_REF}, + {ge::DT_STRING, ge::proto::DT_STRING}, }; auto iter = data_type_map.find(data_type); @@ -93,6 +90,12 @@ void DataDumper::ReleaseDevMem(void **ptr) noexcept { } } +void DataDumper::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond) { + global_step_ = reinterpret_cast(global_step); + loop_per_iter_ = reinterpret_cast(loop_per_iter); + loop_cond_ = reinterpret_cast(loop_cond); +} + void DataDumper::SaveDumpInput(const std::shared_ptr &node) { if (node != nullptr) { auto input_op_desc = node->GetOpDesc(); @@ -110,7 +113,7 @@ void DataDumper::SaveDumpInput(const std::shared_ptr &node) { return; } input_map_.insert( - {op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}}); + {op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}}); } } } @@ -148,6 +151,30 @@ void DataDumper::SaveDumpTask(uint32_t task_id, const std::shared_ptr &o } } +static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uintptr_t loop_cond, + aicpu::dump::OpMappingInfo &op_mapping_info) { + if (step_id != 0) { + GELOGI("step_id exist."); + op_mapping_info.set_step_id_addr(static_cast(step_id)); + } else { + GELOGI("step_id is null."); + } + + if (loop_per_iter != 0) { + GELOGI("loop_per_iter exist."); + op_mapping_info.set_iterations_per_loop_addr(static_cast(loop_per_iter)); + } else { + GELOGI("loop_per_iter is null."); + } + + if (loop_cond != 0) { + GELOGI("loop_cond exist."); + op_mapping_info.set_loop_cond_addr(static_cast(loop_cond)); + } else { + GELOGI("loop_cond is null."); + } +} + Status DataDumper::LoadDumpInfo() { GELOGI("%zu op need dump in %s.", op_list_.size(), model_name_.c_str()); if (op_list_.empty()) { @@ -161,6 +188,7 @@ Status DataDumper::LoadDumpInfo() { op_mapping_info.set_model_name(model_name_); op_mapping_info.set_model_id(model_id_); op_mapping_info.set_flag(kAicpuLoadFlag); + SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); for (const auto &op_iter : op_list_) { aicpu::dump::Task task; diff --git a/src/ge/graph/load/new_model_manager/data_dumper.h b/src/ge/graph/load/new_model_manager/data_dumper.h index 6a3120b4..ea25da31 100644 --- a/src/ge/graph/load/new_model_manager/data_dumper.h +++ b/src/ge/graph/load/new_model_manager/data_dumper.h @@ -17,10 +17,8 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DATA_DUMPER_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DATA_DUMPER_H_ -#include -#include #include -#include +#include #include "framework/common/ge_inner_error_codes.h" #include "graph/node.h" @@ -38,7 +36,10 @@ class DataDumper { op_list_(), input_map_(), load_flag_(false), - device_id_(0) {} + device_id_(0), + global_step_(0), + loop_per_iter_(0), + loop_cond_(0) {} ~DataDumper(); @@ -46,6 +47,7 @@ class DataDumper { void SetModelId(uint32_t model_id) { model_id_ = model_id; } void SetMemory(const RuntimeParam &runtime_param) { runtime_param_ = runtime_param; } void SetDeviceId(uint32_t device_id) { device_id_ = device_id; } + void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond); void SaveDumpInput(const std::shared_ptr &node); // args is device memory stored first output addr @@ -64,26 +66,31 @@ class DataDumper { struct InnerDumpInfo; struct InnerInputMapping; + std::vector op_list_; std::multimap input_map_; bool load_flag_; uint32_t device_id_; + uintptr_t global_step_; + uintptr_t loop_per_iter_; + uintptr_t loop_cond_; +}; - struct InnerDumpInfo { - uint32_t task_id; - std::shared_ptr op; - uintptr_t args; - bool is_task; - int input_anchor_index; - int output_anchor_index; - }; +struct DataDumper::InnerDumpInfo { + uint32_t task_id; + std::shared_ptr op; + uintptr_t args; + bool is_task; + int input_anchor_index; + int output_anchor_index; +}; - struct InnerInputMapping { - std::shared_ptr data_op; - int input_anchor_index; - int output_anchor_index; - }; +struct DataDumper::InnerInputMapping { + std::shared_ptr data_op; + int input_anchor_index; + int output_anchor_index; }; + } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DATA_DUMPER_H_ diff --git a/src/ge/graph/load/new_model_manager/davinci_model.cc b/src/ge/graph/load/new_model_manager/davinci_model.cc index 4887e7d1..95d28ef4 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.cc +++ b/src/ge/graph/load/new_model_manager/davinci_model.cc @@ -16,19 +16,14 @@ #include "graph/load/new_model_manager/davinci_model.h" +#include #include +#include #include #include #include #include - -#include #include -#include - -#include "cce/cce.h" -#include "cce/dnn.h" -#include "cce/optimizer/fusion_engine.h" #include "common/debug/log.h" #include "common/formats/formats.h" #include "common/formats/utils/formats_trans_utils.h" @@ -43,15 +38,14 @@ #include "graph/debug/ge_attr_define.h" #include "graph/ge_context.h" #include "graph/graph.h" -#include "graph/load/new_model_manager/tbe_handle_store.h" #include "graph/load/output/output.h" +#include "graph/load/new_model_manager/tbe_handle_store.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/graph_var_manager.h" #include "graph/manager/util/debug.h" #include "graph/model_serialize.h" #include "graph/node.h" #include "graph/utils/graph_utils.h" -#include "graph/utils/node_utils.h" #include "graph/utils/type_utils.h" #include "init/gelib.h" #include "mmpa/mmpa_api.h" @@ -380,7 +374,8 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptrGetType() == ANN_DATA_TYPE) { data_op_list_.push_back(op_desc); GE_IF_BOOL_EXEC( - (op_desc->GetInputDescPtr(0) != nullptr && op_desc->GetInputDescPtr(0)->GetFormat() != FORMAT_FILTER_HWCK), - data_op_input_tensor_desc_map_[op_desc->GetName()] = op_desc->GetInputDescPtr(0)); + (op_desc->GetInputDescPtr(0) != nullptr && op_desc->GetInputDescPtr(0)->GetFormat() != FORMAT_FILTER_HWCK), + data_op_input_tensor_desc_map_[op_desc->GetName()] = op_desc->GetInputDescPtr(0)); GE_IF_BOOL_EXEC( - (op_desc->GetOutputDescPtr(0) != nullptr && op_desc->GetOutputDescPtr(0)->GetFormat() != FORMAT_FRACTAL_Z), - data_op_output_tensor_desc_map_[op_desc->GetName()] = op_desc->GetOutputDescPtr(0)); + (op_desc->GetOutputDescPtr(0) != nullptr && op_desc->GetOutputDescPtr(0)->GetFormat() != FORMAT_FRACTAL_Z), + data_op_output_tensor_desc_map_[op_desc->GetName()] = op_desc->GetOutputDescPtr(0)); SetOutsideAddr(ModelUtils::GetOutputDataAddrs(runtime_param_, op_desc)); data_dumper_.SaveDumpInput(node); } @@ -744,6 +738,8 @@ Status DavinciModel::Init(void *dev_ptr, size_t memsize, void *weight_ptr, size_ GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc"); GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle"); + SetDataDumperArgs(); + GE_TIMESTAMP_START(DoTaskSink); auto ret = DoTaskSink(); GE_TIMESTAMP_END(DoTaskSink, "GraphLoader::DoTaskSink"); @@ -901,7 +897,8 @@ Status DavinciModel::GetInputDescInfo(vector &input_desc, s h = format == FORMAT_NHWC ? NHWC_DIM_H : NCHW_DIM_H; w = format == FORMAT_NHWC ? NHWC_DIM_W : NCHW_DIM_W; - if (data_op_list_[index]->GetInputDescPtr(0)->GetShape().GetDimNum() == static_cast(NORMAL_TENSOR_SIZE)) { + if (data_op_list_[index]->GetInputDescPtr(0)->GetShape().GetDimNum() == + static_cast(domi::NORMAL_TENSOR_SIZE)) { input.shape_info.num = data_op_list_[index]->GetInputDescPtr(0)->GetShape().GetDim(n); input.shape_info.height = data_op_list_[index]->GetInputDescPtr(0)->GetShape().GetDim(h); input.shape_info.width = data_op_list_[index]->GetInputDescPtr(0)->GetShape().GetDim(w); @@ -927,7 +924,7 @@ void DavinciModel::CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputD uint32_t &format_result) { /// netoutput input tensor desc GE_IF_BOOL_EXEC(op_desc->GetInputDescPtr(index) == nullptr, GELOGE(FAILED, "OpDesc GetInputDescPtr is nullptr"); - return); + return ); Format format = op_desc->GetInputDescPtr(index)->GetFormat(); GeShape shape = op_desc->GetInputDescPtr(index)->GetShape(); DataType data_type = op_desc->GetInputDescPtr(index)->GetDataType(); @@ -996,8 +993,8 @@ Status DavinciModel::GetOutputDescInfo(vector &output_desc, std::vector src_index = op_desc->GetSrcIndex(); GE_CHK_BOOL_RET_STATUS(src_name.size() > index && src_index.size() > index, INTERNAL_ERROR, "construct output_name failed."); - output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + - std::to_string(src_index[index]); + output_name = + std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + std::to_string(src_index[index]); output.name = output_name; output_desc.push_back(output); @@ -1031,12 +1028,26 @@ Status DavinciModel::CopyInputData(const InputData ¤t_data, bool device_da } Status DavinciModel::SyncVarData() { - GELOGI("SyncBroadCastData2Var model id:%u", model_id_); + GELOGI("Sync var data, model id:%u", model_id_); Status ret = SUCCESS; + OpDescPtr global_step = GetVariableOp(NODE_NAME_GLOBAL_STEP); + if (global_step != nullptr) { + auto v_output_size = ModelUtils::GetOutputSize(global_step); + auto v_output_addr = ModelUtils::GetOutputDataAddrs(runtime_param_, global_step); + if (v_output_size.empty() || v_output_addr.empty()) { + GELOGE(PARAM_INVALID, "global step op:%s not set output", global_step->GetName().c_str()); + return PARAM_INVALID; + } + std::vector v_step; + v_step.push_back(iterator_count_); + GE_CHK_RT_RET(rtMemcpy(v_output_addr[0], v_output_size[0], v_step.data(), v_step.size() * sizeof(uint64_t), + RT_MEMCPY_HOST_TO_DEVICE)); + } + for (auto op_desc : variable_op_list_) { ret = - VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); + VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret fail, model id:%u, op name:%s", model_id_, op_desc->GetName().c_str()); } @@ -1132,7 +1143,7 @@ Status DavinciModel::CopyTransData(const std::vector &data, uint32_t TypeUtils::DataTypeToSerialString(dst_data_type).c_str(), formats::ShapeToString(input_shape).c_str(), src_data_size); auto ret = - formats::TransDataType({src_data, static_cast(src_data_size), src_data_type, dst_data_type}, tmp_result); + formats::TransDataType({src_data, static_cast(src_data_size), src_data_type, dst_data_type}, tmp_result); if (ret != SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to trans data type from %s to %s, input shape %s, data size %zu, error code %d", TypeUtils::DataTypeToSerialString(src_data_type).c_str(), @@ -1319,7 +1330,7 @@ Status DavinciModel::ReturnNoOutput(uint32_t model_id, uint32_t data_id) { GELOGI("ReturnNoOutput model id:%u", model_id); for (const auto &op_desc : variable_op_list_) { Status ret = VarManager::Instance(session_id_) - ->SyncBroadCastData2Var(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); + ->SyncBroadCastData2Var(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret fail, model id:%u, op name:%s", model_id, op_desc->GetName().c_str()); } @@ -1426,7 +1437,6 @@ void *DavinciModel::Run(DavinciModel *model) { CsaInteract::GetInstance().WriteErrorCode(FAILED, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); return nullptr, "model_pointer is null!") bool seq_end_flag = false; - uint32_t interator_count = 0; uint32_t model_id = model->Id(); uint32_t device_id = model->GetDeviceId(); @@ -1463,19 +1473,19 @@ void *DavinciModel::Run(DavinciModel *model) { GE_TIMESTAMP_START(Model_SyncVarData); ret = model->SyncVarData(); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - ret != SUCCESS, - (void)model->ReturnResult(model->model_id_, current_data.index, false, false, data_wrapper->GetOutput()); - CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); - continue, "Copy input data to model failed."); // [No need to check value] + ret != SUCCESS, + (void)model->ReturnResult(model->model_id_, current_data.index, false, false, data_wrapper->GetOutput()); + CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); + continue, "Copy input data to model failed."); // [No need to check value] GE_TIMESTAMP_END(Model_SyncVarData, "Model Run SyncVarData"); GELOGI("Copy input data, model id:%u", model_id); ret = model->CopyInputData(current_data, false); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - ret != SUCCESS, - (void)model->ReturnResult(model->model_id_, current_data.index, false, false, data_wrapper->GetOutput()); - CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); - continue, "Copy input data to model failed."); // [No need to check value] + ret != SUCCESS, + (void)model->ReturnResult(model->model_id_, current_data.index, false, false, data_wrapper->GetOutput()); + CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); + continue, "Copy input data to model failed."); // [No need to check value] if (ProfilingManager::Instance().ProfilingOpTraceOn()) { GELOGI("GetOpTraceIterNum:%d", ProfilingManager::Instance().GetOpTraceIterNum()); @@ -1486,44 +1496,43 @@ void *DavinciModel::Run(DavinciModel *model) { // collect profiling for ge ProfilingManager::Instance().ReportProfilingData(model->GetTaskIdOpName()); GELOGI("rtModelExecute start."); - rtError_t rt_ret_prof_on = rtModelExecute(model->rt_model_handle_, model->rt_model_stream_, 0); - GE_IF_BOOL_EXEC(rt_ret_prof_on != RT_ERROR_NONE, rslt_flg = false; (void)model->ReturnResult( - model->model_id_, current_data.index, false, false, data_wrapper->GetOutput()); + rtError_t rt_ret = rtModelExecute(model->rt_model_handle_, model->rt_model_stream_, 0); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, rslt_flg = false; (void)model->ReturnResult( + model->model_id_, current_data.index, false, false, data_wrapper->GetOutput()); continue); // [No need to check value] GELOGI("rtModelExecute end"); GELOGI("rtStreamSynchronize start."); - rt_ret_prof_on = rtStreamSynchronize(model->rt_model_stream_); - GE_IF_BOOL_EXEC(rt_ret_prof_on != RT_ERROR_NONE, rslt_flg = false; (void)model->ReturnResult( - model->model_id_, current_data.index, false, seq_end_flag, data_wrapper->GetOutput()); + rt_ret = rtStreamSynchronize(model->rt_model_stream_); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, rslt_flg = false; (void)model->ReturnResult( + model->model_id_, current_data.index, false, seq_end_flag, data_wrapper->GetOutput()); continue); // [No need to check value] GELOGI("rtStreamSynchronize end."); - ProfilingManager::Instance().StopProfiling(); // just profiling, no need to check value + (void)ProfilingManager::Instance().StopProfiling(); // just profiling, no need to check value } } else { GE_TIMESTAMP_START(rtModelExecute); GELOGI("rtModelExecute start."); - rtError_t rt_ret_prof_off = rtModelExecute(model->rt_model_handle_, model->rt_model_stream_, 0); + rtError_t rt_ret = rtModelExecute(model->rt_model_handle_, model->rt_model_stream_, 0); GE_IF_BOOL_EXEC( - rt_ret_prof_off != RT_ERROR_NONE, rslt_flg = false; - (void)model->ReturnResult(model->model_id_, current_data.index, false, false, data_wrapper->GetOutput()); - CsaInteract::GetInstance().WriteErrorCode(rt_ret_prof_off, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); - continue); + rt_ret != RT_ERROR_NONE, rslt_flg = false; + (void)model->ReturnResult(model->model_id_, current_data.index, false, false, data_wrapper->GetOutput()); + CsaInteract::GetInstance().WriteErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); continue); GELOGI("rtModelExecute end"); GE_TIMESTAMP_END(rtModelExecute, "GraphExcute::rtModelExecute"); GE_TIMESTAMP_START(rtStreamSynchronize); GELOGI("rtStreamSynchronize start."); - rt_ret_prof_off = rtStreamSynchronize(model->rt_model_stream_); - if (rt_ret_prof_off == RT_ERROR_END_OF_SEQUENCE) { + rt_ret = rtStreamSynchronize(model->rt_model_stream_); + if (rt_ret == RT_ERROR_END_OF_SEQUENCE) { seq_end_flag = true; } - GE_IF_BOOL_EXEC(rt_ret_prof_off != RT_ERROR_NONE, rslt_flg = false; GELOGI("seq_end_flg: %d", seq_end_flag); - (void)model->ReturnResult(model->model_id_, current_data.index, false, seq_end_flag, - data_wrapper->GetOutput()); // [No need to check value] - CsaInteract::GetInstance().StoreInternalErrorCode(rt_ret_prof_off, ERROR_MODULE_RUNTIME, - JOBSUBSTATE_GRAPH_EXEC); - continue); + GE_IF_BOOL_EXEC( + rt_ret != RT_ERROR_NONE, rslt_flg = false; GELOGI("seq_end_flg: %d", seq_end_flag); + (void)model->ReturnResult(model->model_id_, current_data.index, false, seq_end_flag, + data_wrapper->GetOutput()); // [No need to check value] + CsaInteract::GetInstance().StoreInternalErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); + continue); GELOGI("rtStreamSynchronize end."); GE_TIMESTAMP_END(rtStreamSynchronize, "GraphExcute::Wait for rtStreamSynchronize"); @@ -1536,14 +1545,14 @@ void *DavinciModel::Run(DavinciModel *model) { GE_TIMESTAMP_START(ReturnResult3); // copy output data from device to host GE_IF_BOOL_EXEC( - !model->output_op_list_.empty(), - (void)model->ReturnResult(model->model_id_, current_data.index, rslt_flg, false, data_wrapper->GetOutput())) + !model->output_op_list_.empty(), + (void)model->ReturnResult(model->model_id_, current_data.index, rslt_flg, false, data_wrapper->GetOutput())) // copy output data from device to host for variable graph GE_IF_BOOL_EXEC(model->output_op_list_.empty(), (void)model->ReturnNoOutput(model->model_id_, current_data.index)); GE_TIMESTAMP_END(ReturnResult3, "GraphExcute::CopyDataFromDeviceToHost"); - interator_count++; - GELOGI("interator_count=%u", interator_count); + model->iterator_count_++; + GELOGI("run iterator count is %lu", model->iterator_count_); } CsaInteract::GetInstance().WriteInternalErrorCode(); @@ -1645,7 +1654,7 @@ void DavinciModel::UnbindTaskSinkStream() { return; } -Status DavinciModel::InitTaskInfo(ModelTaskDef &model_task_def) { +Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) { GELOGI("InitTaskInfo in,task size %zu", model_task_def.task().size()); task_list_.resize(model_task_def.task_size()); std::vector> futures(model_task_def.task_size()); @@ -1659,23 +1668,28 @@ Status DavinciModel::InitTaskInfo(ModelTaskDef &model_task_def) { } for (int32_t i = 0; i < model_task_def.task_size(); ++i) { - futures[i] = executor.commit( - [](const domi::TaskDef &task, DavinciModel *model, rtContext_t ctx, int32_t idx) -> Status { - rtError_t ctx_ret = rtCtxSetCurrent(ctx); - if (ctx_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Failed to set context from rt, error-code 0x%X.", ctx_ret); - return RT_FAILED; - } + std::future f = executor.commit( + [](const domi::TaskDef &task, DavinciModel *model, rtContext_t ctx, int32_t idx) -> Status { + rtError_t rt_ret = rtCtxSetCurrent(ctx); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Failed to set context from rt, error-code 0x%X.", rt_ret); + return RT_FAILED; + } - model->task_list_[idx] = TaskInfoFactory::Instance().Create(static_cast(task.type())); + model->task_list_[idx] = TaskInfoFactory::Instance().Create(static_cast(task.type())); - Status ret = FAILED; - if (model->task_list_[idx] != nullptr) { - ret = model->task_list_[idx]->Init(task, model); - } - return ret; - }, - model_task_def.task(i), this, ctx, i); + Status ret = FAILED; + if (model->task_list_[idx] != nullptr) { + ret = model->task_list_[idx]->Init(task, model); + } + return ret; + }, + model_task_def.task(i), this, ctx, i); + if (!f.valid()) { + GELOGE(FAILED, "Future is invalid"); + return FAILED; + } + futures[i] = std::move(f); } Status ret; @@ -1845,7 +1859,7 @@ Status DavinciModel::ZeroCopyInput(const InputData &input_data) { GELOGE(INTERNAL_ERROR, "data_buf.data is nullptr"); return INTERNAL_ERROR; } - if (!outputs.empty() && ZeroCopyImpl(outputs[0], data_buf) != SUCCESS) { + if (ZeroCopyImpl(outputs[0], data_buf) != SUCCESS) { return FAILED; } } @@ -1986,10 +2000,10 @@ Status DavinciModel::InitConstant(const ConstOpDescPtr &op_desc) const { return PARAM_INVALID;); GeTensor *tensor = const_cast(v_weights[0].get()); - GE_IF_BOOL_EXEC(v_output_size[0] < tensor->GetData().size(), - GELOGE(PARAM_INVALID, "output size:%u less than weight data size:%zu", v_output_size[0], - tensor->GetData().size()); - return PARAM_INVALID;); + GE_IF_BOOL_EXEC( + v_output_size[0] < tensor->GetData().size(), + GELOGE(PARAM_INVALID, "output size:%u less than weight data size:%zu", v_output_size[0], tensor->GetData().size()); + return PARAM_INVALID;); GE_IF_BOOL_EXEC(tensor->GetData().size() == 0, GELOGW("const op:%s has no weight data.", op_desc->GetName().c_str()); return SUCCESS;); @@ -2125,25 +2139,27 @@ Status DavinciModel::MarkActiveStream(const OpDescPtr &op_desc) { GE_CHECK_NOTNULL(op_desc); std::string type = op_desc->GetType(); GE_IF_BOOL_EXEC( - type == STREAMSWITCH, std::vector active_stream_list; - GE_LOGI_IF(!ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_stream_list), - "GetInt ACTIVE_STREAM_LIST fail."); - if (active_stream_list.size() != TRUE_BRANCH_STREAM_NUM) { - GELOGE(INTERNAL_ERROR, "Stream num of switch true branch must be %u.", TRUE_BRANCH_STREAM_NUM); - return INTERNAL_ERROR; - } uint32_t true_stream_id = active_stream_list.front(); - active_stream_indication_.insert(true_stream_id); - GELOGI("flowctrl_op_index_map node:%s, true_stream_id=%u.", op_desc->GetName().c_str(), true_stream_id);); - GE_IF_BOOL_EXEC(type == STREAMACTIVE, if (op_desc->HasAttr(ATTR_NAME_SWITCH_BRANCH_NODE_LABEL)) { - std::vector active_stream_list; - GE_CHK_BOOL_EXEC(AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_stream_list), - return INTERNAL_ERROR, "StreamActiveOp get attr ACTIVE_STREAM fail."); - - for (size_t j = 0; j < active_stream_list.size(); ++j) { - active_stream_indication_.insert(active_stream_list[j]); - GELOGI("flowctrl_op_index_map node:%s, active_stream_id=%u.", op_desc->GetName().c_str(), active_stream_list[j]); - } - }); + type == STREAMSWITCH, std::vector active_stream_list; + GE_LOGI_IF(!ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_stream_list), + "GetInt ACTIVE_STREAM_LIST fail."); + if (active_stream_list.size() != TRUE_BRANCH_STREAM_NUM) { + GELOGE(INTERNAL_ERROR, "Stream num of switch true branch must be %u.", TRUE_BRANCH_STREAM_NUM); + return INTERNAL_ERROR; + } uint32_t true_stream_id = active_stream_list.front(); + active_stream_indication_.insert(true_stream_id); + GELOGI("flowctrl_op_index_map node:%s, true_stream_id=%u.", op_desc->GetName().c_str(), true_stream_id);); + GE_IF_BOOL_EXEC( + type == STREAMACTIVE, if (op_desc->HasAttr(ATTR_NAME_SWITCH_BRANCH_NODE_LABEL)) { + std::vector active_stream_list; + GE_CHK_BOOL_EXEC(AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_stream_list), + return INTERNAL_ERROR, "StreamActiveOp get attr ACTIVE_STREAM fail."); + + for (size_t j = 0; j < active_stream_list.size(); ++j) { + active_stream_indication_.insert(active_stream_list[j]); + GELOGI("flowctrl_op_index_map node:%s, active_stream_id=%u.", op_desc->GetName().c_str(), + active_stream_list[j]); + } + }); return SUCCESS; } @@ -2264,7 +2280,7 @@ uint8_t *DavinciModel::MallocWeightsMem(uint32_t weights_size) { if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { string weight_memory_key = std::to_string(0) + "_w"; weights_mem_base = - MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(weight_memory_key, weights_size, GetDeviceId()); + MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(weight_memory_key, weights_size, GetDeviceId()); } else { weights_mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(weights_size, GetDeviceId()); } @@ -2334,46 +2350,50 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) if (node->GetType() != VARIABLE) { continue; } - vector_future.push_back(executor.commit( - [](ge::NodePtr &node, DavinciModel *model, rtContext_t ctx, uint32_t graph_id) -> Status { - if (model == nullptr) { - GELOGE(FAILED, "DavinciModel is NULL!"); - return FAILED; - } - rtError_t rt_ret = rtCtxSetCurrent(ctx); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Failed to set context, error_code is: 0x%X.", rt_ret); - return RT_FAILED; + std::future f = executor.commit( + [](ge::NodePtr &node, DavinciModel *model, rtContext_t ctx, uint32_t graph_id) -> Status { + if (model == nullptr) { + GELOGE(FAILED, "DavinciModel is NULL!"); + return FAILED; + } + rtError_t rt_ret = rtCtxSetCurrent(ctx); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Failed to set context, error_code is: 0x%X.", rt_ret); + return RT_FAILED; + } + uint32_t allocated_graph_id = 0; + Status ret = VarManager::Instance(model->session_id_)->GetAllocatedGraphId(node->GetName(), allocated_graph_id); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "var has not been allocated, node:%s, graph_id:%u.", node->GetName().c_str(), + graph_id); + return INTERNAL_ERROR; + } + uint32_t changed_graph_id = 0; + ret = VarManager::Instance(model->session_id_)->GetChangedGraphId(node->GetName(), changed_graph_id); + bool call_trans_var = + (ret == SUCCESS && changed_graph_id == graph_id && changed_graph_id != allocated_graph_id); + if (call_trans_var) { + GELOGI("VarManager::GetChangedGraphId() success, node:%s, graph_id:%u.", node->GetName().c_str(), graph_id); + VarTransRoad *trans_road = VarManager::Instance(model->session_id_)->GetTransRoad(node->GetName()); + if (trans_road == nullptr) { + GELOGI("The variable %s does not have any trans road", node->GetName().c_str()); + return SUCCESS; } - uint32_t allocated_graph_id = 0; - Status ret = - VarManager::Instance(model->session_id_)->GetAllocatedGraphId(node->GetName(), allocated_graph_id); + ret = TransVarData(node, *trans_road, model->session_id_, model->device_id_); if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "var has not been allocated, node:%s, graph_id:%u.", node->GetName().c_str(), - graph_id); + GELOGE(INTERNAL_ERROR, "TransVarData failed, node:%s, graph_id:%u.", node->GetName().c_str(), graph_id); return INTERNAL_ERROR; } - uint32_t changed_graph_id = 0; - ret = VarManager::Instance(model->session_id_)->GetChangedGraphId(node->GetName(), changed_graph_id); - bool call_trans_var = - (ret == SUCCESS && changed_graph_id == graph_id && changed_graph_id != allocated_graph_id); - if (call_trans_var) { - GELOGI("VarManager::GetChangedGraphId() success, node:%s, graph_id:%u.", node->GetName().c_str(), graph_id); - VarTransRoad *trans_road = VarManager::Instance(model->session_id_)->GetTransRoad(node->GetName()); - if (trans_road == nullptr) { - GELOGI("The variable %s does not have any trans road", node->GetName().c_str()); - return SUCCESS; - } - ret = TransVarData(node, *trans_road, model->session_id_, model->device_id_); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "TransVarData failed, node:%s, graph_id:%u.", node->GetName().c_str(), graph_id); - return INTERNAL_ERROR; - } - VarManager::Instance(model->session_id_)->RemoveChangedGraphId(node->GetName()); - } - return SUCCESS; - }, - node, this, ctx, graph_id)); + VarManager::Instance(model->session_id_)->RemoveChangedGraphId(node->GetName()); + } + return SUCCESS; + }, + node, this, ctx, graph_id); + if (!f.valid()) { + GELOGE(FAILED, "Future is invalid"); + return FAILED; + } + vector_future.push_back(std::move(f)); } Status ret_status; @@ -2390,8 +2410,8 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) return SUCCESS; } -void DavinciModel::InitDataDumper() { - GELOGI("data dumper init, name: %s, id: %u.", name_.c_str(), model_id_); +void DavinciModel::SetDataDumperArgs() { + GELOGI("set data dumper args, name: %s, id: %u.", name_.c_str(), model_id_); data_dumper_.SetModelName(name_); data_dumper_.SetModelId(model_id_); data_dumper_.SetMemory(runtime_param_); @@ -2403,7 +2423,26 @@ void DavinciModel::InitDataDumper() { return; } data_dumper_.SetDeviceId(device_id); - GELOGI("InitDataDumper end."); + + // set loop count addr + auto get_var_addr = [](const OpDescPtr &op, const RuntimeParam &runtime_param) -> void * { + if (op != nullptr) { + auto v_output_size = ModelUtils::GetOutputSize(op); + auto v_output_addr = ModelUtils::GetOutputDataAddrs(runtime_param, op); + if (v_output_size.empty() || v_output_addr.empty()) { + return nullptr; + } + return v_output_addr[0]; + } + GELOGW("op is null."); + return nullptr; + }; + + data_dumper_.SetLoopAddr(get_var_addr(GetVariableOp(NODE_NAME_GLOBAL_STEP), runtime_param_), + get_var_addr(GetVariableOp(NODE_NAME_FLOWCTRL_LOOP_PER_ITER), runtime_param_), + get_var_addr(GetVariableOp(NODE_NAME_FLOWCTRL_LOOP_COND), runtime_param_)); + + GELOGI("SetDataDumperArgs end."); } uint32_t DavinciModel::GetFlowctrlIndex(uint32_t op_index) { @@ -2425,13 +2464,13 @@ Status TransTensor(uint8_t *var_data, const NodePtr &var_src, const NodePtr &var auto src_data_datatype = var_src->GetOpDesc()->GetOutputDesc(0).GetDataType(); auto dst_data_datatype = var_dst->GetOpDesc()->GetOutputDesc(0).GetDataType(); GE_IF_BOOL_EXEC( - src_data_datatype != dst_data_datatype, - auto ret = formats::TransDataType( - {var_data, static_cast(src_data_shape_size), src_data_datatype, dst_data_datatype}, result); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "trans var data on host failed"); - return ret; - }); + src_data_datatype != dst_data_datatype, + auto ret = formats::TransDataType( + {var_data, static_cast(src_data_shape_size), src_data_datatype, dst_data_datatype}, result); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "trans var data on host failed"); + return ret; + }); return SUCCESS; } diff --git a/src/ge/graph/load/new_model_manager/davinci_model.h b/src/ge/graph/load/new_model_manager/davinci_model.h index 822b87eb..4a674517 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.h +++ b/src/ge/graph/load/new_model_manager/davinci_model.h @@ -24,28 +24,24 @@ #include #include -#include "cce/cce_def.hpp" -#include "cce/dnn.h" -#include "cce/dnn_base_def.hpp" -#include "cce/taskdown_common.hpp" #include "common/ge_types.h" -#include "common/helper/model_helper.h" -#include "common/helper/om_file_helper.h" -#include "common/opskernel/ge_task_info.h" #include "common/types.h" -#include "framework/common/util.h" -#include "graph/debug/ge_attr_define.h" -#include "graph/load/new_model_manager/data_dumper.h" #include "graph/load/new_model_manager/data_inputer.h" #include "graph/load/new_model_manager/model_utils.h" +#include "proto/task.pb.h" +#include "mmpa/mmpa_api.h" +#include "graph/debug/ge_attr_define.h" +#include "common/opskernel/ge_task_info.h" +#include "framework/common/util.h" #include "graph/model.h" -#include "graph/node.h" #include "graph/op_desc.h" #include "graph/operator.h" -#include "graph/utils/attr_utils.h" #include "graph/utils/tensor_utils.h" -#include "mmpa/mmpa_api.h" -#include "proto/task.pb.h" +#include "common/helper/model_helper.h" +#include "common/helper/om_file_helper.h" +#include "graph/load/new_model_manager/data_dumper.h" +#include "graph/node.h" +#include "graph/utils/attr_utils.h" #include "task_info/task_info.h" #define WEIGHTS_ADDR_TO_CCE(var) @@ -521,7 +517,7 @@ class DavinciModel { Status CopyVarData(ComputeGraphPtr &graph); Status CopyTensorFromSrcVarNode(const NodePtr &var_src, const NodePtr &var_dst); - void InitDataDumper(); + void SetDataDumperArgs(); bool is_model_has_inited_; uint32_t model_id_; @@ -615,6 +611,8 @@ class DavinciModel { int64_t maxDumpOpNum_; // for data dump DataDumper data_dumper_; + + uint64_t iterator_count_; }; #define TIME_LOG_HEAD_FMT " OP_ID OP_NAME OP_TYPE ELAPSED TIME(ms)" diff --git a/src/ge/graph/load/new_model_manager/model_manager.cc b/src/ge/graph/load/new_model_manager/model_manager.cc index b3325e14..0c4fe294 100644 --- a/src/ge/graph/load/new_model_manager/model_manager.cc +++ b/src/ge/graph/load/new_model_manager/model_manager.cc @@ -19,7 +19,6 @@ #include #include "cce/aicpu_engine_struct.h" -#include "cce/compiler_stub.h" #include "common/l2_cache_optimize.h" #include "common/profiling/profiling_manager.h" #include "common/properties_manager.h" @@ -31,10 +30,12 @@ namespace ge { thread_local uint32_t device_count = 0; namespace { const int kCmdParSize = 2; +const int kDumpCmdPairSize = 2; } // namespace + std::shared_ptr ModelManager::GetInstance() { static const std::shared_ptr instance_ptr = - shared_ptr(new (std::nothrow) ModelManager(), ModelManager::FinalizeForPtr); + shared_ptr(new (std::nothrow) ModelManager(), ModelManager::FinalizeForPtr); return instance_ptr; } @@ -55,7 +56,7 @@ static Status KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType opType, uint64_t } rt_ret = - rtMemcpy(devicebase, sizeof(STR_FWK_OP_KERNEL), ¶m_base, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); + rtMemcpy(devicebase, sizeof(STR_FWK_OP_KERNEL), ¶m_base, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(rt_ret, "memory copy to device failed."); GE_CHK_RT(rtFree(devicebase)); @@ -146,7 +147,7 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, shared_ptr & GenModelId(&model_id); GE_CHK_STATUS_RET(SetDevice(static_cast(GetContext().DeviceId())), "Set device failed, model id:%u.", - model_id); + model_id); std::shared_ptr davinci_model = MakeShared(0, listener); if (davinci_model == nullptr) { @@ -160,12 +161,11 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, shared_ptr & Status ret = SUCCESS; do { GeModelPtr ge_model; - GE_IF_BOOL_EXEC(ModelHelper::TransModelToGeModel(model, ge_model) != SUCCESS, - GELOGW("trans model to ge_model failed."); - break;); + GE_IF_BOOL_EXEC( + ModelHelper::TransModelToGeModel(model, ge_model) != SUCCESS, GELOGW("trans model to ge_model failed."); break;); GE_TIMESTAMP_START(Assign); GE_IF_BOOL_EXEC(SUCCESS != (ret = davinci_model->Assign(ge_model)), GELOGW("assign model to modeldef failed."); - break;); + break;); GE_TIMESTAMP_END(Assign, "GraphLoader::ModelAssign"); GE_TIMESTAMP_START(Init); @@ -202,23 +202,6 @@ Status ModelManager::DeleteModel(uint32_t id) { return SUCCESS; } -Status ModelManager::UnLoadAllModel(int32_t DeviceId) { - vector id_list; - - for (const auto &it : model_map_) { - uint32_t model_id = it.first; - GELOGI("Unload All model : model id : %u", model_id); - id_list.push_back(model_id); - GE_CHK_STATUS_RET(Stop(model_id), "UnLoadAllModel: Stop model : %u failed.", model_id); - } - - for (const auto &id : id_list) { - GE_CHK_STATUS_RET(UnloadModeldef(id), "UnLoadAllModel: Unload model : %u failed.", id); - } - - return SUCCESS; -} - std::shared_ptr ModelManager::GetModel(uint32_t id) { std::lock_guard lock(map_mutex_); @@ -323,6 +306,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector> cmds = { - {"profile", HandleProfileCommand}, {"dump", HandleDumpCommand}, {"profiling", HandleAclProfilingCommand}}; + {"profile", HandleProfileCommand}, {"dump", HandleDumpCommand}, {"profiling", HandleAclProfilingCommand}}; auto iter = cmds.find(command.cmd_type); if (iter == cmds.end()) { @@ -442,8 +426,7 @@ Status ModelManager::HandleProfileCommand(const Command &command) { PropertiesManager::Instance().SetPropertyValue(iter->second, property_value); } - if ((map_key == PROFILER_JOBCTX || map_key == PROFILER_TARGET_PATH || - map_key == RTS_PROFILE_PATH)) { + if ((map_key == PROFILER_JOBCTX || map_key == PROFILER_TARGET_PATH || map_key == RTS_PROFILE_PATH)) { PropertiesManager::Instance().SetPropertyValue(map_key, value); } @@ -459,7 +442,7 @@ Status ModelManager::HandleProfileCommand(const Command &command) { } Status ModelManager::HandleDumpCommand(const Command &command) { - if (command.cmd_params.size() % kCmdParSize != 0) { + if (command.cmd_params.size() % kDumpCmdPairSize != 0) { GELOGE(PARAM_INVALID, "When the cmd_type is 'dump', the size of cmd_params must be a even number."); return PARAM_INVALID; } @@ -468,6 +451,7 @@ Status ModelManager::HandleDumpCommand(const Command &command) { std::string dump_model(DUMP_ALL_MODEL); std::string dump_path("/"); std::set dump_layers; + std::string dump_layer_count; auto iter_dump_status = std::find(command.cmd_params.begin(), command.cmd_params.end(), DUMP_STATUS); if (iter_dump_status != command.cmd_params.end()) { @@ -498,10 +482,10 @@ Status ModelManager::HandleDumpCommand(const Command &command) { return SUCCESS; } - for (size_t i = 0; i < command.cmd_params.size() / 2; ++i) { - if (command.cmd_params.at(i * kCmdParSize).find(DUMP_LAYER) != std::string::npos) { - GELOGI("dump layer: %s.", command.cmd_params.at(i * kCmdParSize + 1).c_str()); - (void)dump_layers.insert(command.cmd_params.at(i * kCmdParSize + 1)); + for (size_t i = 0; i < command.cmd_params.size() / kDumpCmdPairSize; ++i) { + if (command.cmd_params.at(i * kDumpCmdPairSize).find(DUMP_LAYER) != std::string::npos) { + GELOGI("dump layer: %s.", command.cmd_params.at(i * kDumpCmdPairSize + 1).c_str()); + dump_layers.insert(command.cmd_params.at(i * kDumpCmdPairSize + 1)); } } @@ -529,7 +513,7 @@ Status ModelManager::HandleDumpCommand(const Command &command) { Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_size) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetMaxUsedMemory Failed, Invalid Model ID %u !", - model_id); + model_id); max_size = davinci_model->TotalMemSize(); return SUCCESS; @@ -539,7 +523,7 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector &output_desc) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, - "GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); + "GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); return davinci_model->GetInputOutputDescInfo(input_desc, output_desc); } @@ -548,7 +532,7 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, vector &output_desc) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, - "GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); + "GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc); } @@ -558,7 +542,7 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector &inputFormats, std::vector &outputFormats) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, - "GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); + "GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); return davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats); } @@ -569,7 +553,7 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, std::vector &outputFormats) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, - "GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); + "GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats); } @@ -577,7 +561,7 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr listener, void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, PARAM_INVALID, - "input key file path is not valid!"); + "input key file path is not valid!"); GenModelId(&model_id); shared_ptr davinci_model = nullptr; @@ -634,7 +618,7 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d const std::vector &input_queue_ids, const std::vector &output_queue_ids) { GE_CHK_BOOL_RET_STATUS(model_data.key.empty() || access(model_data.key.c_str(), F_OK) == 0, PARAM_INVALID, - "input key file path is not valid!"); + "input key file path is not valid!"); ModelHelper model_helper; Status ret = model_helper.LoadModel(model_data); diff --git a/src/ge/graph/load/new_model_manager/model_manager.h b/src/ge/graph/load/new_model_manager/model_manager.h index a392a380..08607926 100644 --- a/src/ge/graph/load/new_model_manager/model_manager.h +++ b/src/ge/graph/load/new_model_manager/model_manager.h @@ -102,14 +102,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { /// /// @ingroup domi_ome - /// @brief unload all models and free resources - /// @return Status run result - /// @author - /// - ge::Status UnLoadAllModel(int32_t DeviceId); - - /// - /// @ingroup domi_ome /// @brief process input data asynchronously /// cannot be invoked by multiple thread /// if one fails, other continue @@ -179,8 +171,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { std::vector &output_desc); ge::Status GetInputOutputDescInfo(const uint32_t model_id, std::vector &input_desc, - std::vector &output_desc, - std::vector &inputFormats, std::vector &outputFormats); + std::vector &output_desc, std::vector &inputFormats, + std::vector &outputFormats); /// /// @ingroup domi_ome diff --git a/src/ge/graph/load/new_model_manager/model_utils.h b/src/ge/graph/load/new_model_manager/model_utils.h index 7d0c49c3..950fdbe8 100644 --- a/src/ge/graph/load/new_model_manager/model_utils.h +++ b/src/ge/graph/load/new_model_manager/model_utils.h @@ -19,8 +19,6 @@ #include -#include "cce/dnn.h" -#include "cce/taskdown_api.h" #include "common/ge_inner_error_codes.h" #include "common/types.h" #include "graph/load/new_model_manager/task_info/task_info.h" diff --git a/src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc old mode 100755 new mode 100644 index 80c31b09..3facd504 --- a/src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc @@ -96,7 +96,7 @@ Status HcclTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_m if (workspace_mem_size_tmp != 0) { workspace_mem_size_ = workspace_mem_size_tmp; vector workspace_data_addrs = - ModelUtils::GetWorkspaceDataAddrs(davinci_model->GetRuntimeParam(), op_desc); + ModelUtils::GetWorkspaceDataAddrs(davinci_model->GetRuntimeParam(), op_desc); if (!workspace_data_addrs.empty()) { GELOGI("Get work_space_addr"); workspace_addr_ = workspace_data_addrs[0]; @@ -114,7 +114,7 @@ Status HcclTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_m for (int64_t i = 0; i < hccl_stream_num; ++i) { rtStream_t stream = nullptr; rtError_t rt_ret = - rtStreamCreateWithFlags(&stream, davinci_model->Priority(), RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY); + rtStreamCreateWithFlags(&stream, davinci_model->Priority(), RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return RT_FAILED; @@ -213,8 +213,8 @@ void HcclTaskInfo::TransToGETaskInfo(GETaskInfo &ge_task) { ge_task.kernelHcclInfo.outputDataAddr = output_data_addr_; ge_task.kernelHcclInfo.workSpaceAddr = workspace_addr_; ge_task.kernelHcclInfo.count = count_; - ge_task.kernelHcclInfo.dataType = data_type_; - ge_task.kernelHcclInfo.opType = op_type_; + ge_task.kernelHcclInfo.dataType = static_cast(data_type_); + ge_task.kernelHcclInfo.opType = static_cast(op_type_); ge_task.kernelHcclInfo.rootId = root_id_; ge_task.kernelHcclInfo.workSpaceMemSize = workspace_mem_size_; ge_task.kernelHcclInfo.hcclStreamList = hccl_stream_list_; @@ -240,8 +240,8 @@ void HcclTaskInfo::GetPrivateDefByTaskDef(const domi::TaskDef &task) { return; } - ret = rtMemcpy(private_def_, private_def_len_, task.private_def().c_str(), private_def_len_, - RT_MEMCPY_HOST_TO_HOST); + ret = + rtMemcpy(private_def_, private_def_len_, task.private_def().c_str(), private_def_len_, RT_MEMCPY_HOST_TO_HOST); if (ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rtMemcpy Fail, ret = 0x%X.", ret); return; diff --git a/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc index 765ae1b3..32a9da8b 100644 --- a/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc @@ -19,17 +19,15 @@ #include #include "cce/aicpu_engine_struct.h" -#include "cce/fwk_adpt_struct.h" -#include "common/ge/ge_util.h" -#include "common/properties_manager.h" #include "framework/common/debug/ge_log.h" #include "framework/common/fmk_error_codes.h" +#include "common/ge/ge_util.h" +#include "common/properties_manager.h" #include "graph/attr_value.h" #include "graph/load/new_model_manager/davinci_model.h" #include "graph/load/new_model_manager/model_manager.h" namespace ge { -static const char *const GE_GLOBAL_STEP = "Variable"; Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { GELOGI("KernelExTaskInfo Init Start."); @@ -59,8 +57,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin return FAILED; } else { rtError_t rt_ret = - rtMemcpy(workspace_data_addrs[0], kernel_ex_def.task_info_size(), kernel_ex_def.task_info().data(), - kernel_ex_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE); + rtMemcpy(workspace_data_addrs[0], kernel_ex_def.task_info_size(), kernel_ex_def.task_info().data(), + kernel_ex_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(FAILED, "rtMemcpy error: 0x%X", rt_ret); return FAILED); } @@ -72,19 +70,19 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin return FAILED; } errno_t sec_ret = - memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), kernel_ex_def.args().data(), kernel_ex_def.args_size()); + memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), kernel_ex_def.args().data(), kernel_ex_def.args_size()); if (sec_ret != EOK) { GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); return FAILED; } // 2.1 get loop cond variable for tensor array write - uint64_t loop_cond_addr = 0; - OpDescPtr loop_cond_node = davinci_model->GetVariableOp(GE_GLOBAL_STEP); - if (loop_cond_node != nullptr) { - vector v_loop_cond_addr = ModelUtils::GetOutputDataAddrs(davinci_model->GetRuntimeParam(), loop_cond_node); - if (v_loop_cond_addr.size() != 0) { - loop_cond_addr = static_cast(reinterpret_cast(v_loop_cond_addr[0])); + uint64_t step_id_addr = 0; + OpDescPtr step_id_node = davinci_model->GetVariableOp(NODE_NAME_GLOBAL_STEP); + if (step_id_node != nullptr) { + vector v_step_id_addr = ModelUtils::GetOutputDataAddrs(davinci_model->GetRuntimeParam(), step_id_node); + if (!v_step_id_addr.empty()) { + step_id_addr = static_cast(reinterpret_cast(v_step_id_addr[0])); } } @@ -107,15 +105,15 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model->Name(), op_desc->GetName())) { dump_flag_ = RT_KERNEL_DUMPFLAG; - dump_args_ = reinterpret_cast(reinterpret_cast(input_output_addr_) + - sizeof(void *) * input_addrs.size()); + dump_args_ = + reinterpret_cast(reinterpret_cast(input_output_addr_) + sizeof(void *) * input_addrs.size()); } } uint64_t input_output_addr = static_cast(reinterpret_cast(input_output_addr_)); fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = workspace_base_addr; fwk_op_kernel.fwkKernelBase.fwk_kernel.inputOutputAddr = input_output_addr; - fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = loop_cond_addr; + fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = step_id_addr; // 4. Create session auto session_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID; diff --git a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 1115969a..5b3877c8 100644 --- a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -20,6 +20,7 @@ #include #include #include "aicpu/common/aicpu_task_struct.h" +#include "common/ge/plugin_manager.h" #include "common/properties_manager.h" #include "framework/common/debug/ge_log.h" #include "framework/common/l2_cache_optimize.h" @@ -27,10 +28,12 @@ #include "graph/load/new_model_manager/model_utils.h" #include "runtime/kernel.h" -namespace ge { -static constexpr uint8_t kL2LoadToDdr = 1; -static constexpr uint8_t kL2NotLoadToDdr = 0; +namespace { +const uint8_t kL2LoadToDdr = 1; +const uint8_t kL2NotLoadToDdr = 0; +} // namespace +namespace ge { Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { GELOGD("KernelTaskInfo Init Start."); if (davinci_model == nullptr) { @@ -64,17 +67,15 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci if (kernel_type_ == cce::ccKernelType::CCE_AI_CORE) { rtError_t rt_ret; rt_ret = rtGetFunctionByName(const_cast(kernel_def.stub_func().c_str()), &stub_func_); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, - GELOGE(RT_FAILED, - "execute rtGetFunctionByName failed. stub_func: %s", - kernel_def.stub_func().c_str()); - return RT_FAILED;); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s", + kernel_def.stub_func().c_str()); + return RT_FAILED;); } else if (kernel_type_ != cce::ccKernelType::AI_CPU) { rtError_t rt_ret; rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key); - return RT_FAILED;); + return RT_FAILED;); } if (context.origin_op_index_size() > CC_FUSION_OP_MAX) { @@ -117,10 +118,9 @@ Status KernelTaskInfo::Distribute() { if (kernel_type_ == cce::ccKernelType::AI_CPU) { // blockDim is reserved parameter, set to 1 - rt_ret = - rtCpuKernelLaunchWithFlag(reinterpret_cast(so_name_.c_str()), - reinterpret_cast(kernel_name_.c_str()), - 1, args_, args_size_, nullptr, stream_, dump_flag_); + rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast(so_name_.c_str()), + reinterpret_cast(kernel_name_.c_str()), 1, args_, args_size_, + nullptr, stream_, dump_flag_); } else { rt_ret = rtKernelLaunchWithFlag(stub_func_, block_dim_, args_, args_size_, static_cast(sm_desc_), stream_, dump_flag_); @@ -194,7 +194,7 @@ Status KernelTaskInfo::InitTVMTask(DavinciModel *davinci_model, uint16_t offset, const vector input_data_addrs = ModelUtils::GetInputDataAddrs(davinci_model->GetRuntimeParam(), op_desc); const vector output_data_addrs = ModelUtils::GetOutputDataAddrs(davinci_model->GetRuntimeParam(), op_desc); const vector workspace_data_addrs = - ModelUtils::GetWorkspaceDataAddrs(davinci_model->GetRuntimeParam(), op_desc); + ModelUtils::GetWorkspaceDataAddrs(davinci_model->GetRuntimeParam(), op_desc); vector tensor_device_addrs; tensor_device_addrs.insert(tensor_device_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); @@ -232,8 +232,8 @@ Status KernelTaskInfo::InitTVMTask(DavinciModel *davinci_model, uint16_t offset, if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model->Name(), op_desc->GetName())) { dump_flag_ = RT_KERNEL_DUMPFLAG; - dump_args_ = reinterpret_cast(reinterpret_cast(args_) + offset + - sizeof(void *) * input_data_addrs.size()); + dump_args_ = + reinterpret_cast(reinterpret_cast(args_) + offset + sizeof(void *) * input_data_addrs.size()); } davinci_model_->SetZeroCopyAddr(tensor_device_addrs, static_cast(args_) + offset); @@ -302,9 +302,9 @@ Status KernelTaskInfo::InitAICPUCustomTask(const std::map input_data_addrs = - ModelUtils::GetInputDataAddrs(davinci_model_->GetRuntimeParam(), op_desc); + ModelUtils::GetInputDataAddrs(davinci_model_->GetRuntimeParam(), op_desc); const std::vector output_data_addrs = - ModelUtils::GetOutputDataAddrs(davinci_model_->GetRuntimeParam(), op_desc); + ModelUtils::GetOutputDataAddrs(davinci_model_->GetRuntimeParam(), op_desc); Status ret = StoreInputOutputTensor(input_data_addrs, output_data_addrs, ModelUtils::GetInputDescs(op_desc), ModelUtils::GetOutputDescs(op_desc)); @@ -349,15 +349,15 @@ Status KernelTaskInfo::InitAICPUCustomTask(const std::map(args + ctx_.argsOffset[0])) = - reinterpret_cast(custom_info_.input_descs); // arg 0 + reinterpret_cast(custom_info_.input_descs); // arg 0 *(reinterpret_cast(args + ctx_.argsOffset[1])) = - reinterpret_cast(custom_info_.input_addrs); // arg 1 + reinterpret_cast(custom_info_.input_addrs); // arg 1 *(reinterpret_cast(args + ctx_.argsOffset[2])) = - reinterpret_cast(custom_info_.output_descs); // arg 2 + reinterpret_cast(custom_info_.output_descs); // arg 2 *(reinterpret_cast(args + ctx_.argsOffset[3])) = - reinterpret_cast(custom_info_.output_addrs); // arg 3 + reinterpret_cast(custom_info_.output_addrs); // arg 3 *(reinterpret_cast(args + ctx_.argsOffset[4])) = - reinterpret_cast(custom_info_.attr_handle); // arg 4 + reinterpret_cast(custom_info_.attr_handle); // arg 4 rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { @@ -365,8 +365,8 @@ Status KernelTaskInfo::InitAICPUCustomTask(const std::map &op_lis if (!io_addrs.empty()) { // refresh io addrs uintptr_t io_addr = - reinterpret_cast(args_addr.get()) + static_cast(sizeof(aicpu::AicpuParamHead)); + reinterpret_cast(args_addr.get()) + static_cast(sizeof(aicpu::AicpuParamHead)); auto addrs_size = sizeof(uint64_t) * (io_addrs.size()); sec_ret = memcpy_s(reinterpret_cast(io_addr), addrs_size, static_cast(io_addrs.data()), addrs_size); if (sec_ret != EOK) { @@ -512,7 +512,7 @@ Status KernelTaskInfo::InitAicpuTask(const std::map &op_lis if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), op_desc->GetName())) { dump_flag_ = RT_KERNEL_DUMPFLAG; dump_args_ = reinterpret_cast(reinterpret_cast(args_) + sizeof(aicpu::AicpuParamHead) + - sizeof(void *) * input_addrs.size()); + sizeof(void *) * input_addrs.size()); } davinci_model_->SetZeroCopyAddr(io_addrs, static_cast(args_) + sizeof(aicpu::AicpuParamHead)); @@ -640,32 +640,80 @@ Status KernelTaskInfo::UpdateCceArgs(std::string &sm_desc, std::string &flowtabl const domi::KernelDef &kernel_def) { GE_CHECK_NOTNULL(davinci_model); const domi::KernelContext &context = kernel_def.context(); + + uint64_t data_base_addr = + reinterpret_cast(reinterpret_cast(davinci_model->MemBase())) - davinci_model->GetRtBaseAddr(); + uint64_t weight_base_addr = reinterpret_cast(reinterpret_cast(davinci_model->WeightsMemBase())) - + davinci_model->GetRtWeightAddr(); + uint64_t var_base_addr = reinterpret_cast(reinterpret_cast(davinci_model->VarMemBase())) - + davinci_model->GetRtVarAddr(); + + Status status = + CceUpdateKernelArgs(context, data_base_addr, weight_base_addr, var_base_addr, sm_desc, flowtable, kernel_def); + if (status != SUCCESS) { + GELOGE(FAILED, "Call cce api failed"); + return FAILED; + } + return SUCCESS; +} + +Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, uint64_t &data_base_addr, + uint64_t &weight_base_addr, uint64_t &var_base_addr, std::string &sm_desc, + std::string &flowtable, const domi::KernelDef &kernel_def) { char *sm_contrl = nullptr; if (!sm_desc.empty()) { sm_contrl = const_cast(sm_desc.data()); } - uint64_t data_base_addr = reinterpret_cast(reinterpret_cast(davinci_model->MemBase())) - - davinci_model->GetRtBaseAddr(); - uint64_t weight_base_addr = reinterpret_cast(reinterpret_cast(davinci_model->WeightsMemBase())) - - davinci_model->GetRtWeightAddr(); - uint64_t var_base_addr = reinterpret_cast(reinterpret_cast(davinci_model->VarMemBase())) - - davinci_model->GetRtVarAddr(); + std::string file_name = "libcce.so"; + std::string path = PluginManager::GetPath(); + path.append(file_name); + char canonicalPath[PATH_MAX] = {0}; + if (path.length() >= PATH_MAX) { + GELOGW("File path is too long."); + return FAILED; + } + if (realpath(path.c_str(), canonicalPath) == nullptr) { + GELOGW("failed to get realpath of %s", path.c_str()); + return FAILED; + } + + GELOGI("FileName:%s, Path:%s.", file_name.c_str(), canonicalPath); + auto handle = dlopen(canonicalPath, RTLD_NOW | RTLD_GLOBAL); + if (handle == nullptr) { + GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", dlerror()); + return FAILED; + } cce::ccStatus_t cc_ret; - if (context.is_flowtable()) { - cc_ret = ccUpdateKernelArgs(ctx_, data_base_addr, weight_base_addr, var_base_addr, - const_cast(flowtable.data()), kernel_def.flowtable().size(), sm_contrl); + auto cceUpdateKernelArgs = (cce::ccStatus_t(*)(cce::ccOpContext &, uint64_t, uint64_t, uint64_t, void *, uint64_t, + void *))dlsym(handle, "ccUpdateKernelArgs"); + if (cceUpdateKernelArgs == nullptr) { + GELOGE(FAILED, "Failed to invoke function ccUpdateKernelArgs"); + if (dlclose(handle) != 0) { + GELOGW("Failed to close handle %s", dlerror()); + } + return FAILED; } else { - cc_ret = ccUpdateKernelArgs(ctx_, data_base_addr, weight_base_addr, var_base_addr, - const_cast(kernel_def.args().data()), args_size_, sm_contrl); + GELOGI("Libcce.so has been opened"); + if (context.is_flowtable()) { + cc_ret = cceUpdateKernelArgs(ctx_, data_base_addr, weight_base_addr, var_base_addr, + const_cast(flowtable.data()), kernel_def.flowtable().size(), sm_contrl); + } else { + cc_ret = cceUpdateKernelArgs(ctx_, data_base_addr, weight_base_addr, var_base_addr, + const_cast(kernel_def.args().data()), args_size_, sm_contrl); + } + } + if (dlclose(handle) != 0) { + GELOGW("Failed to close handle %s", dlerror()); + return FAILED; } - if (cc_ret != cce::CC_STATUS_SUCCESS) { GELOGE(CCE_FAILED, "Call cce api failed, ret: 0x%X", cc_ret); return CCE_FAILED; } + GELOGI("CceUpdateKernelArgs success!"); return SUCCESS; } @@ -696,8 +744,8 @@ Status KernelTaskInfo::SetFlowtable(std::string &flowtable, const domi::KernelDe } *(reinterpret_cast( - args + (reinterpret_cast(const_cast(context.args_offset().data())))[0])) = - reinterpret_cast(flowtable_); + args + (reinterpret_cast(const_cast(context.args_offset().data())))[0])) = + reinterpret_cast(flowtable_); } return SUCCESS; } diff --git a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h index 03827bec..c1291e1a 100644 --- a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h +++ b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h @@ -65,7 +65,7 @@ class KernelTaskInfo : public TaskInfo { uint32_t GetTaskID() override { return task_id_; } uintptr_t GetDumpArgs() override { - auto ret = reinterpret_cast(dump_args_); + auto ret = reinterpret_cast(dump_args_); return ret; } @@ -91,6 +91,9 @@ class KernelTaskInfo : public TaskInfo { Status UpdateCceArgs(std::string &sm_desc, std::string &flowtable, DavinciModel *davinci_model, const domi::KernelDef &kernel_def); + Status CceUpdateKernelArgs(const domi::KernelContext &context, uint64_t &data_base_addr, uint64_t &weight_base_addr, + uint64_t &var_base_addr, std::string &sm_desc, std::string &flowtable, + const domi::KernelDef &kernel_def); Status SetFlowtable(std::string &flowtable, const domi::KernelDef &kernel_def); diff --git a/src/ge/graph/load/output/output.h b/src/ge/graph/load/output/output.h index cff8505e..cd74a59d 100644 --- a/src/ge/graph/load/output/output.h +++ b/src/ge/graph/load/output/output.h @@ -20,7 +20,6 @@ #include #include -#include "cce/dnn_base_def.hpp" #include "common/debug/log.h" #include "common/op/attr_value_util.h" #include "common/op/ge_op_utils.h" @@ -55,8 +54,7 @@ class Output { /// @brief when model running, Add one DataOp as input node, Add one Output Op as output node. /// @return Status /// - virtual Status CopyResult(OutputData &rslt, uint32_t data_begin, uint32_t &data_index, - bool support_mem_share); + virtual Status CopyResult(OutputData &rslt, uint32_t data_begin, uint32_t &data_index, bool support_mem_share); /// /// @ingroup domi diff --git a/src/ge/graph/manager/graph_manager.cc b/src/ge/graph/manager/graph_manager.cc index 9ac45dcc..ff238fb3 100644 --- a/src/ge/graph/manager/graph_manager.cc +++ b/src/ge/graph/manager/graph_manager.cc @@ -194,7 +194,8 @@ Status GraphManager::Finalize() { return unload_model_ret; } -Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph) { +Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, + const std::map &options) { if (graph_map_.find(graph_id) != graph_map_.end()) { GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[GraphManager] graph exists, graph_id = %u.", graph_id); return GE_GRAPH_GRAPH_ALREADY_EXIST; @@ -228,6 +229,7 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph) { } graph_node->SetGraph(graph_ptr); + graph_node->SetOptions(options); graph_map_.insert(std::make_pair(graph_id, graph_node)); @@ -307,8 +309,13 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector> vector_future(sub_graph_list_size); for (size_t i = 0; i < sub_graph_list_size; ++i) { - vector_future[i] = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, sub_graph_list[i], - session_id, GetThreadLocalContext()); + std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, sub_graph_list[i], + session_id, GetThreadLocalContext()); + if (!f.valid()) { + GELOGE(FAILED, "Future is invalid"); + return FAILED; + } + vector_future[i] = std::move(f); } for (size_t i = 0; i < vector_future.size(); ++i) { Status ret_status = vector_future[i].get(); @@ -363,7 +370,8 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vectorGetGraph())); GE_IF_BOOL_EXEC( - GetTrainFlag(), GE_IF_BOOL_EXEC(compute_graph_tmp == nullptr, - GELOGE(GE_GRAPH_GRAPH_NODE_NULL, - "[RunGraph] compute_graph_tmp is NULL, graph id = %u.", graph_id); - return GE_GRAPH_GRAPH_NODE_NULL;) - // adapt for not set. - GE_IF_BOOL_EXEC(!compute_graph_tmp->GetNeedIteration(), - compute_graph_tmp->SetNeedIteration(GraphUtils::CheckIsTrainGraph(compute_graph_tmp));)) + GetTrainFlag(), + GE_IF_BOOL_EXEC(compute_graph_tmp == nullptr, + GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[RunGraph] compute_graph_tmp is NULL, graph id = %u.", graph_id); + return GE_GRAPH_GRAPH_NODE_NULL;) + // adapt for not set. + GE_IF_BOOL_EXEC(!compute_graph_tmp->GetNeedIteration(), + compute_graph_tmp->SetNeedIteration(GraphUtils::CheckIsTrainGraph(compute_graph_tmp));)) std::vector ge_models; @@ -802,7 +810,11 @@ Status GraphManager::ParseOptions(const std::map &opti GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.localFmkopFlag value is invalid, must be 0 or 1."); return GE_GRAPH_OPTIONS_INVALID; } - + options_.enable_print_op_pass = true; + ret = ParseOption(options, ENABLE_PRINT_OP_PASS, options_.enable_print_op_pass); + GE_IF_BOOL_EXEC(ret != SUCCESS, + GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.enablePrintOpPass value is invalid, must be 0 or 1."); + return GE_GRAPH_OPTIONS_INVALID); // parse hcom parallel options_.hcom_parallel = false; ret = ParseOption(options, HCOM_PARALLEL, options_.hcom_parallel); @@ -1030,7 +1042,7 @@ Status GraphManager::SummaryHandle(const GraphId &graph_id, std::vector summary_output_index; GELOGI("[GraphManager] SummaryHandle, outputsSize=%zu.", outputs.size()); const std::map> &whole_summary_output_indexes = - graph_optimize_.GetSummaryOutputIndexes(); + graph_optimize_.GetSummaryOutputIndexes(); if (whole_summary_output_indexes.find(graph_id) == whole_summary_output_indexes.end()) { GELOGE(FAILED, "No Summary graph found in map."); return FAILED; @@ -1098,8 +1110,7 @@ Status GraphManager::CheckpointHandle(const GraphId &graph_id, const std::vector } Status GraphManager::RegisterCallBackFunc( - const std::string &key, - const std::function &)> &callback) { + const std::string &key, const std::function &)> &callback) { GELOGI("[GraphManager] RegisterCallBackFunc, key=%s.", key.c_str()); me_callback_map_[key] = callback; return SUCCESS; @@ -1335,10 +1346,9 @@ Status GraphManager::OptimizeAfterMergeSubGraph(ge::ComputeGraphPtr &compute_gra GE_CHK_STATUS_RET(after_merge_passes.AddPass(new (std::nothrow) VariableRefDeleteOpPass)) GE_CHK_STATUS_RET(after_merge_passes.AddPass(new (std::nothrow) SameTransdataBreadthFusionPass)) GE_CHK_STATUS_RET(after_merge_passes.AddPass(new (std::nothrow) TransOpWithoutReshapeFusionPass)) - GE_CHK_STATUS_RET(after_merge_passes.AddPass(new (std::nothrow) CompileNodesPass)) GE_CHK_STATUS_RET(after_merge_passes.AddPass(new (std::nothrow) AtomicAddrCleanPass)) GE_CHK_STATUS_RET( - after_merge_passes.AddPass(new (std::nothrow) LinkGenMaskNodesPass(options_.stream_max_parallel_num))) + after_merge_passes.AddPass(new (std::nothrow) LinkGenMaskNodesPass(options_.stream_max_parallel_num))) GE_TIMESTAMP_START(after_merge_passes); ret = after_merge_passes.Run(compute_graph); @@ -1445,9 +1455,9 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra } GELOGI( - "CheckAndReleaseMemory Graph[%u] need memory_size[%ld], weight_size[%ld]," - " Device[%u] free_memory_size[%ld]", - graph_node->GetGraphId(), memory_size, weight_size, GetContext().DeviceId(), free_memory); + "CheckAndReleaseMemory Graph[%u] need memory_size[%ld], weight_size[%ld]," + " Device[%u] free_memory_size[%ld]", + graph_node->GetGraphId(), memory_size, weight_size, GetContext().DeviceId(), free_memory); if (CheckInt64AddOverflow(memory_size, weight_size) != SUCCESS) { GELOGE(INTERNAL_ERROR, "The sum of Memory size and weight size exceeds INT64_MAX"); return INTERNAL_ERROR; @@ -1502,7 +1512,7 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager if (sub_graph_info_ptr != nullptr && graph_manager != nullptr) { ComputeGraphPtr compute_graph_tmp = sub_graph_info_ptr->GetSubGraph(); const std::string &engine_name = sub_graph_info_ptr->GetEngineName(); - GELOGI("ProcessSubGraphWithMultiThreads start, graph name is %s, engine_name is %s, thread id is %lu", + GELOGD("ProcessSubGraphWithMultiThreads start, graph name is %s, engine_name is %s, thread id is %lu", compute_graph_tmp != nullptr ? compute_graph_tmp->GetName().c_str() : "", engine_name.c_str(), pthread_self()); GraphUtils::DumpGEGraph(compute_graph_tmp, "OptimizeSubGraphBefore"); @@ -1513,13 +1523,11 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager if (ret != SUCCESS) { GELOGE(ret, "SubGraph optimize Failed %s", engine_name.c_str()); return ret; - } else { - GELOGI("SubGraph optimize success %s", engine_name.c_str()); } GraphUtils::DumpGEGraph(compute_graph_tmp, "OptimizeSubGraphAfter"); GraphUtils::DumpGEGraphToOnnx(*compute_graph_tmp, "OptimizeSubGraphAfter"); sub_graph_info_ptr->SetSubGraph(compute_graph_tmp); - GELOGI("ProcessSubGraphWithMultiThreads end, graph name is %s, engine_name is %s, thread id is %lu", + GELOGD("ProcessSubGraphWithMultiThreads end, graph name is %s, engine_name is %s, thread id is %lu", compute_graph_tmp != nullptr ? compute_graph_tmp->GetName().c_str() : "", engine_name.c_str(), pthread_self()); } else { @@ -1537,7 +1545,7 @@ Status GraphManager::RunGraphAsync(const GraphId &graph_id, const std::vectorGetBuildFlag()) { ReturnError(graph_manager, args.callback, PARAM_INVALID, "The graph " + std::to_string(graph_node->GetGraphId()) + - " need to re-build, you should remove it" - " from GE first, then AddGraph again and rebuild it."); + " need to re-build, you should remove it" + " from GE first, then AddGraph again and rebuild it."); + graph_node->Unlock(); return; } ret = graph_manager->PreRun(graph_node, ge_inputs, ge_models, ge_model, args.session_id); if (ret != SUCCESS) { graph_node->SetRunFlag(false); - ReturnError(graph_manager, args.callback, ret, "PreRun Failed."); + ReturnError(graph_manager, args.callback, ret, "PreRun failed, thread exit."); graph_node->Unlock(); return; } @@ -1736,4 +1745,18 @@ bool GraphManager::IsGraphNeedRebuild(uint32_t graph_id) { bool GraphManager::IsGraphNeedBuild(const GraphNodePtr &graph_node) { return !graph_node->GetBuildFlag() || var_acc_ctrl_.IsGraphNeedRebuild(graph_node->GetGraphId()); } +const map *GraphManager::GetGraphOptions(uint32_t graph_id) { + GraphNodePtr graph_node = nullptr; + Status ret = GetGraphNode(graph_id, graph_node); + if (ret != SUCCESS) { + GELOGE(ret, "[RunGraph] graph not exist, graph_id=%u.", graph_id); + return nullptr; + } + + if (!graph_node) { + GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[RunGraph] graph node is NULL, graph_id=%u.", graph_id); + return nullptr; + } + return &(graph_node->GetOptions()); +} } // namespace ge diff --git a/src/ge/graph/manager/graph_manager.h b/src/ge/graph/manager/graph_manager.h index cd07a679..c76e2e7c 100644 --- a/src/ge/graph/manager/graph_manager.h +++ b/src/ge/graph/manager/graph_manager.h @@ -69,7 +69,7 @@ class GraphManager { /// @param [out] Graph output graph /// @return Status result of function /// - Status AddGraph(const GraphId &graph_id, const Graph &graph); + Status AddGraph(const GraphId &graph_id, const Graph &graph, const std::map &options); /// /// @ingroup ge_graph @@ -143,13 +143,14 @@ class GraphManager { /// @return Status result of function /// Status RegisterCallBackFunc( - const std::string &key, - const std::function &)> &callback); + const std::string &key, const std::function &)> &callback); const bool GetTrainFlag() const { return options_.train_graph_flag; } bool IsGraphNeedRebuild(uint32_t graph_id); + const std::map *GetGraphOptions(uint32_t graph_id); + private: struct PreRunArgs { GraphId graph_id; diff --git a/src/ge/graph/manager/graph_manager_utils.cc b/src/ge/graph/manager/graph_manager_utils.cc index bd08a554..3666b5c5 100644 --- a/src/ge/graph/manager/graph_manager_utils.cc +++ b/src/ge/graph/manager/graph_manager_utils.cc @@ -30,6 +30,8 @@ #include "runtime/mem.h" namespace ge { +using OpDescPtr = std::shared_ptr; + GraphNode::GraphNode(GraphId graph_id) : graph_id_(graph_id), run_flag_(false), @@ -48,9 +50,7 @@ GraphNode::GraphNode(GraphId graph_id) GraphNode::~GraphNode() = default; -void GraphNode::Lock() { - sem_.Push(0); -} +void GraphNode::Lock() { sem_.Push(0); } void GraphNode::Unlock() { uint8_t unused; @@ -111,9 +111,9 @@ Status GraphModelListener::SetCondition(std::mutex *mutex, std::condition_variab Status GraphModelListener::OnComputeDone(uint32_t model_id, uint32_t task_id, uint32_t result) { GELOGI( - "[GraphManager] graph compute call back, model_id:%u, task_id:%u, " - "resultCode:%u.", - model_id, task_id, result); + "[GraphManager] graph compute call back, model_id:%u, task_id:%u, " + "resultCode:%u.", + model_id, task_id, result); GE_IF_BOOL_EXEC(condition_ == nullptr, GELOGE(FAILED, "[GraphModelListener] condition is null."); return FAILED); std::lock_guard lock(*mutex_); result_code_ = result; @@ -150,8 +150,7 @@ void RunAsyncListener::SetCallback(const std::function &callback) } Status RunAsyncListener::OnComputeDone(uint32_t model_id, uint32_t task_id, uint32_t result) { - GELOGI("[GraphManager] run graph async call back, modelId:%u, taskId:%u, resultCode:%u.", - model_id, task_id, result); + GELOGI("[GraphManager] run graph async call back, modelId:%u, taskId:%u, resultCode:%u.", model_id, task_id, result); GE_CHECK_NOTNULL(callback_); callback_(result); uint8_t unused; @@ -177,22 +176,6 @@ bool HasCalcOp(const ComputeGraphPtr &graph) { return false; } -Status CheckTinyCalc(const char *cal_conf, const ComputeGraphPtr &graph) { - if ((Params::Instance() != nullptr) && (Params::Instance()->GetTarget() != TARGET_TYPE_TINY)) { - return SUCCESS; - } - - if (cal_conf != nullptr && *cal_conf != '\0') { - return SUCCESS; - } - - if (HasCalcOp(graph)) { - return GE_GRAPH_PARAM_NULLPTR; - } - - return SUCCESS; -} - Status ParseOutNodes(const string &out_nodes) { try { if (!out_nodes.empty()) { diff --git a/src/ge/graph/manager/graph_manager_utils.h b/src/ge/graph/manager/graph_manager_utils.h index 725df2e0..7c6e9be1 100644 --- a/src/ge/graph/manager/graph_manager_utils.h +++ b/src/ge/graph/manager/graph_manager_utils.h @@ -157,6 +157,8 @@ class GraphNode { void SetLoadFlag(bool load_flag) { load_flag_ = load_flag; } void SetGeModel(const GeModelPtr &ge_model) { ge_model_ = ge_model; } GeModelPtr GetGeModel() const { return ge_model_; } + const std::map &GetOptions() const { return options_; } + void SetOptions(const std::map &options) { options_ = options; } void Lock(); void Unlock(); @@ -165,6 +167,7 @@ class GraphNode { private: GraphId graph_id_; + std::map options_; bool run_flag_; std::vector subgraph_ptr_list_; @@ -207,8 +210,6 @@ class GraphModelListener : public ge::ModelListener { std::condition_variable *condition_; }; -Status CheckTinyCalc(const char *cal_conf, const ComputeGraphPtr &graph); - Status ParseOutNodes(const string &out_nodes); struct GraphManagerOptions { @@ -231,6 +232,7 @@ struct GraphManagerOptions { bool train_graph_flag; bool local_fmk_op_flag; bool hcom_parallel; + bool enable_print_op_pass; std::map stream_max_parallel_num; std::string output_datatype; std::string original_model_file; @@ -254,6 +256,7 @@ struct GraphManagerOptions { train_graph_flag(false), local_fmk_op_flag(false), hcom_parallel(false), + enable_print_op_pass(true), save_original_model(false) {} }; } // namespace ge diff --git a/src/ge/graph/manager/graph_var_manager.cc b/src/ge/graph/manager/graph_var_manager.cc index 217bbb2c..c0117bdf 100644 --- a/src/ge/graph/manager/graph_var_manager.cc +++ b/src/ge/graph/manager/graph_var_manager.cc @@ -29,9 +29,9 @@ #include "graph/utils/attr_utils.h" #include "graph/utils/type_utils.h" +using std::map; using std::string; using std::vector; -using std::map; namespace ge { VarResource::VarResource(uint64_t session_id) : session_id_(session_id) {} @@ -67,7 +67,6 @@ ge::Status VarResource::GetVarAddr(const std::string &var_name, const ge::GeTens void VarResource::SetVarAddr(const std::string &var_name, const ge::GeTensorDesc &tensor_desc, uint8_t *dev_ptr, rtMemType_t memory_type) { std::string var_key = VarKey(var_name, tensor_desc); - GELOGI("VarResource::SetVarAddr , var_key = %s, mem_type:%u", var_key.c_str(), memory_type); if (var_addr_mgr_map_.count(var_key) == 0) { GELOGI("SetVarAddr node_name %s, tensor_desc type %s, format %s", var_name.c_str(), TypeUtils::DataTypeToSerialString(tensor_desc.GetDataType()).c_str(), @@ -89,9 +88,6 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen if (var_addr_mgr_map_.count(var_key) == 0) { uint64_t logic_address = VarManager::Instance(0)->GetVarMemLogicBase() + reinterpret_cast(reinterpret_cast(address)); - GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(), - TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(), - TypeUtils::DataTypeToSerialString(tensor_desc.GetDataType()).c_str()); VarAddrMgr var_addr_mgr; var_addr_mgr.address = reinterpret_cast(reinterpret_cast(logic_address)); var_addr_mgr.offset = reinterpret_cast(reinterpret_cast(address)); @@ -117,8 +113,8 @@ bool VarResource::IsVarExist(const std::string &var_name) { return cur_var_tenso std::string VarResource::VarKey(const std::string &var_name, const ge::GeTensorDesc &tensor_desc) { std::string var_key(var_name); var_key.append(std::to_string(static_cast(tensor_desc.GetFormat()))) - .append("_") - .append(std::to_string(static_cast(tensor_desc.GetDataType()))); + .append("_") + .append(std::to_string(static_cast(tensor_desc.GetDataType()))); return var_key; } @@ -257,7 +253,7 @@ Status VarResource::SetAllocatedGraphId(const std::string &var_name, uint32_t gr return SUCCESS; } -MemResource::MemResource() : total_size_(0), var_mem_base_(nullptr), var_mem_size_(0) {} +MemResource::MemResource() : total_size_(0), var_mem_size_(0) {} Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset) { size = (size + kSessionMemAlignSize - 1) / kSessionMemAlignSize * kSessionMemAlignSize; @@ -407,9 +403,6 @@ int64_t VarManager::GetVarMemSize(rtMemType_t memory_type) { ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTensorDesc &tensor_desc, rtMemType_t memory_type) { std::lock_guard lock(mutex_); - GELOGI("VarManager::AssignVarMem var_name = %s, data_type = %s, data_format = %s.", var_name.c_str(), - ge::TypeUtils::DataTypeToSerialString(tensor_desc.GetDataType()).c_str(), - ge::TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str()); uint32_t tensor_desc_size = 0; size_t mem_offset = 0; @@ -448,14 +441,14 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen } result = var_resource_->SaveVarAddr( - var_name, tensor_desc, reinterpret_cast(reinterpret_cast(mem_offset)), memory_type); + var_name, tensor_desc, reinterpret_cast(reinterpret_cast(mem_offset)), memory_type); if (result != SUCCESS) { GELOGE(ge::INTERNAL_ERROR, "AssignVarMem by offset failed."); return ge::INTERNAL_ERROR; } result = var_resource_->GetVarAddr( - var_name, tensor_desc, reinterpret_cast(reinterpret_cast(&mem_offset)), memory_type); + var_name, tensor_desc, reinterpret_cast(reinterpret_cast(&mem_offset)), memory_type); if (result != SUCCESS) { GELOGE(ge::INTERNAL_ERROR, "GetVarAddr by offset failed."); return ge::INTERNAL_ERROR; @@ -532,12 +525,12 @@ ge::Status VarManager::GetCurVarDesc(const std::string &var_name, ge::GeTensorDe ge::Status VarManager::SaveBroadCastInfo(uint32_t graph_id, const VarBroadCastInfo &broad_cast_info) { std::lock_guard lock(mutex_); GELOGI( - "VarManager::SaveBroadCastInfo var_name = %s, broadcast name = %s, " - "idx = %d, input_offset = %ld, input_size = %lu, output_offset = %ld, " - "output_size = %lu", - broad_cast_info.var_name.c_str(), broad_cast_info.broadcast_name.c_str(), broad_cast_info.idx, - broad_cast_info.input_offset, broad_cast_info.input_size, broad_cast_info.output_offset, - broad_cast_info.output_size); + "VarManager::SaveBroadCastInfo var_name = %s, broadcast name = %s, " + "idx = %d, input_offset = %ld, input_size = %lu, output_offset = %ld, " + "output_size = %lu", + broad_cast_info.var_name.c_str(), broad_cast_info.broadcast_name.c_str(), broad_cast_info.idx, + broad_cast_info.input_offset, broad_cast_info.input_size, broad_cast_info.output_offset, + broad_cast_info.output_size); if (var_resource_ == nullptr) { GELOGW("VarManager has not been init."); return ge::INTERNAL_ERROR; diff --git a/src/ge/graph/manager/graph_var_manager.h b/src/ge/graph/manager/graph_var_manager.h index c78f83db..a2b974e4 100644 --- a/src/ge/graph/manager/graph_var_manager.h +++ b/src/ge/graph/manager/graph_var_manager.h @@ -177,7 +177,6 @@ class MemResource { private: uint64_t total_size_; - uint8_t *var_mem_base_; uint64_t var_mem_size_; }; @@ -295,5 +294,5 @@ class VarManagerPool { std::mutex var_manager_mutex_; map var_manager_map_; }; -}; // namespace ge +}; // namespace ge #endif // GE_GRAPH_MANAGER_GRAPH_VAR_MANAGER_H_ diff --git a/src/ge/graph/manager/util/debug.cc b/src/ge/graph/manager/util/debug.cc old mode 100755 new mode 100644 index 67f9fdf0..96420638 --- a/src/ge/graph/manager/util/debug.cc +++ b/src/ge/graph/manager/util/debug.cc @@ -16,6 +16,8 @@ #include "graph/manager/util/debug.h" +#include + #include "common/ge/ge_util.h" #include "framework/common/debug/ge_log.h" @@ -29,9 +31,10 @@ Debug::Debug() = default; Debug::~Debug() = default; void Debug::DumpProto(const Message &proto, const char *file) { - int fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + std::string file_path = RealPath(file); + int fd = open(file_path.c_str(), O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if (fd == -1) { - GELOGW("Write %s failed", file); + GELOGW("Write %s failed", file_path.c_str()); return; } auto output = ge::MakeShared(fd); @@ -52,10 +55,14 @@ void Debug::DumpProto(const Message &proto, const char *file) { } Status Debug::DumpDevMem(const char *file, const void *addr, uint32_t size) { + if (size == 0) { + GELOGI("Dump data failed because the size is 0."); + return SUCCESS; + } uint8_t *host_addr = nullptr; rtError_t ret = rtMallocHost(reinterpret_cast(&host_addr), size); if (ret != RT_ERROR_NONE) { - GELOGE(FAILED, "Call rt api rtMallocHost failed."); + GELOGE(FAILED, "Call rt api rtMallocHost failed, ret: 0x%X", ret); return FAILED; } GE_MAKE_GUARD_RTMEM(host_addr); diff --git a/src/ge/graph/manager/util/debug.h b/src/ge/graph/manager/util/debug.h old mode 100755 new mode 100644 diff --git a/src/ge/graph/optimize/graph_optimize.cc b/src/ge/graph/optimize/graph_optimize.cc index 8b698724..849ad296 100644 --- a/src/ge/graph/optimize/graph_optimize.cc +++ b/src/ge/graph/optimize/graph_optimize.cc @@ -18,7 +18,6 @@ #include -#include "cce/optimizer/fusion_engine.h" #include "framework/common/debug/ge_log.h" #include "graph/anchor.h" #include "graph/passes/dimension_adjust_pass.h" @@ -51,25 +50,24 @@ void AddNodeInputProperty(ComputeGraphPtr &compute_graph) { } for (ge::NodePtr &node : compute_graph->GetDirectNode()) { auto node_op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return); + GE_IF_BOOL_EXEC(node_op_desc == nullptr, GELOGW("node_op_desc is nullptr!"); return ); auto in_control_anchor = node->GetInControlAnchor(); vector src_name_list; vector input_name_list; vector src_index_list; GE_IF_BOOL_EXEC( - in_control_anchor != nullptr, string src_name_temp; for (auto &out_control_anchor - : in_control_anchor->GetPeerOutControlAnchors()) { - ge::NodePtr src_node = out_control_anchor->GetOwnerNode(); - GE_IF_BOOL_EXEC(src_node == nullptr, GELOGW("src_node is nullptr!"); continue); - src_name_temp = src_name_temp == "" ? src_node->GetName() : src_name_temp + ":" + src_node->GetName(); - } GE_IF_BOOL_EXEC(src_name_temp != "", src_name_list.emplace_back(src_name_temp); - node_op_desc->SetSrcName(src_name_list);)) + in_control_anchor != nullptr, string src_name_temp; for (auto &out_control_anchor + : in_control_anchor->GetPeerOutControlAnchors()) { + ge::NodePtr src_node = out_control_anchor->GetOwnerNode(); + GE_IF_BOOL_EXEC(src_node == nullptr, GELOGW("src_node is nullptr!"); continue); + src_name_temp = src_name_temp == "" ? src_node->GetName() : src_name_temp + ":" + src_node->GetName(); + } GE_IF_BOOL_EXEC(src_name_temp != "", src_name_list.emplace_back(src_name_temp); + node_op_desc->SetSrcName(src_name_list);)) for (auto &in_data_anchor : node->GetAllInDataAnchors()) { auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); - GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, - GELOGW("peer_out_anchor is nullptr! node: %s", node->GetName().c_str()); - continue); + GE_IF_BOOL_EXEC( + peer_out_anchor == nullptr, GELOGW("peer_out_anchor is nullptr! node: %s", node->GetName().c_str()); continue); ge::NodePtr src_node = peer_out_anchor->GetOwnerNode(); src_name_list = node_op_desc->GetSrcName(); @@ -78,11 +76,11 @@ void AddNodeInputProperty(ComputeGraphPtr &compute_graph) { src_index_list.emplace_back(peer_out_anchor->GetIdx()); node_op_desc->SetSrcName(src_name_list); node_op_desc->SetSrcIndex(src_index_list); - GE_IF_BOOL_EXEC(!(node_op_desc->GetType() == NETOUTPUT && GetContext().type == domi::FMK_TYPE_T), + GE_IF_BOOL_EXEC(!(node_op_desc->GetType() == NETOUTPUT && domi::GetContext().type == domi::FMK_TYPE_T), ge::NodePtr peer_owner_node = peer_out_anchor->GetOwnerNode(); input_name_list = node_op_desc->GetInputName(); input_name_list.emplace_back( - peer_owner_node->GetName() + - (peer_out_anchor->GetIdx() == 0 ? "" : ": " + to_string(peer_out_anchor->GetIdx()))); + peer_owner_node->GetName() + + (peer_out_anchor->GetIdx() == 0 ? "" : ": " + to_string(peer_out_anchor->GetIdx()))); node_op_desc->SetInputName(input_name_list);) } } @@ -217,7 +215,7 @@ void GraphOptimize::TranFrameOp(ComputeGraphPtr &compute_graph) { GE_IF_BOOL_EXEC(op == nullptr, GELOGW("op is nullptr!"); continue); // fwkop black-white sheet vector::iterator iter = - std::find(local_framework_op_vec.begin(), local_framework_op_vec.end(), op->GetType()); + std::find(local_framework_op_vec.begin(), local_framework_op_vec.end(), op->GetType()); if (iter != local_framework_op_vec.end()) { // set - original_type if (!AttrUtils::SetStr(op, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, op->GetType())) { diff --git a/src/ge/graph/optimize/graph_optimize.h b/src/ge/graph/optimize/graph_optimize.h index e1e7a7c0..26b65b46 100644 --- a/src/ge/graph/optimize/graph_optimize.h +++ b/src/ge/graph/optimize/graph_optimize.h @@ -33,7 +33,9 @@ #include "graph/manager/graph_manager_utils.h" #include "omg/omg_inner_types.h" +/*lint -e148*/ namespace ge { +using ComputeGraphPtr = std::shared_ptr; using GraphOptimizerPtr = std::shared_ptr; class GraphOptimize { public: @@ -54,7 +56,7 @@ class GraphOptimize { const std::map> &GetSummaryOutputIndexes() const { return summary_output_indexes_; - } + } // lint !e1073 void ClearSummaryOutputIndexes() { summary_output_indexes_.clear(); } @@ -80,4 +82,5 @@ class GraphOptimize { std::string func_bin_path_; }; }; // namespace ge +/*lint +e148*/ #endif // GE_GRAPH_OPTIMIZE_GRAPH_OPTIMIZE_H_ diff --git a/src/ge/graph/optimize/summary_optimize.cc b/src/ge/graph/optimize/summary_optimize.cc index 058d5b8c..3347f042 100644 --- a/src/ge/graph/optimize/summary_optimize.cc +++ b/src/ge/graph/optimize/summary_optimize.cc @@ -77,7 +77,7 @@ Status GraphOptimize::HandleSummaryOp(ComputeGraphPtr &compute_graph) { del_nodes.emplace_back(node_ptr); } } - summary_output_indexes_.emplace(compute_graph->GetGraphID(), summary_output_indexes); + summary_output_indexes_.insert({compute_graph->GetGraphID(), summary_output_indexes}); // add output nodes for summary std::vector> out_nodes_info; diff --git a/src/ge/graph/partition/engine_place.cc b/src/ge/graph/partition/engine_place.cc index e9dc9a9d..eb8a0f11 100644 --- a/src/ge/graph/partition/engine_place.cc +++ b/src/ge/graph/partition/engine_place.cc @@ -15,12 +15,10 @@ */ #include "graph/partition/engine_place.h" - #include #include #include #include - #include "common/op/ge_op_utils.h" #include "graph/utils/graph_utils.h" #include "graph/utils/op_desc_utils.h" @@ -73,7 +71,7 @@ Status EnginePlacer::AssignEngineAndLog(ge::ConstNodePtr node_ptr, const std::st } // private function, promise node_ptr->GetOpDesc() not null - GELOGI("Assigning DNNEngine %s to node %s, op type %s", engine_name.c_str(), node_ptr->GetName().c_str(), + GELOGD("Assigning DNNEngine %s to node %s, op type is %s", engine_name.c_str(), node_ptr->GetName().c_str(), node_ptr->GetOpDesc()->GetType().c_str()); // Record the node assigned engine name @@ -82,4 +80,3 @@ Status EnginePlacer::AssignEngineAndLog(ge::ConstNodePtr node_ptr, const std::st return SUCCESS; } } // namespace ge - diff --git a/src/ge/graph/partition/graph_partition.cc b/src/ge/graph/partition/graph_partition.cc index a6f48b54..2324c6e0 100644 --- a/src/ge/graph/partition/graph_partition.cc +++ b/src/ge/graph/partition/graph_partition.cc @@ -15,13 +15,11 @@ */ #include "graph/partition/graph_partition.h" - #include #include #include #include #include - #include "common/ge/ge_util.h" #include "common/op/ge_op_utils.h" #include "graph/debug/ge_attr_define.h" @@ -126,11 +124,11 @@ Status ge::GraphPartitioner::RemoveNodeAndEdgeBetweenEndPld(ge::ComputeGraphPtr if ((end != nullptr) && (pld != nullptr) && (end->GetInDataAnchor(0) != nullptr) && (pld->GetOutDataAnchor(0) != nullptr)) { AnchorPtr end_in_anchor = (end->GetInDataAnchor(0)->GetFirstPeerAnchor() == nullptr) - ? Anchor::DynamicAnchorCast(end->GetInControlAnchor()) - : Anchor::DynamicAnchorCast(end->GetInDataAnchor(0)); + ? Anchor::DynamicAnchorCast(end->GetInControlAnchor()) + : Anchor::DynamicAnchorCast(end->GetInDataAnchor(0)); AnchorPtr pld_out_anchor = (pld->GetOutDataAnchor(0)->GetFirstPeerAnchor() == nullptr) - ? Anchor::DynamicAnchorCast(pld->GetOutControlAnchor()) - : Anchor::DynamicAnchorCast(pld->GetOutDataAnchor(0)); + ? Anchor::DynamicAnchorCast(pld->GetOutControlAnchor()) + : Anchor::DynamicAnchorCast(pld->GetOutDataAnchor(0)); auto src_anchor = end_in_anchor->GetFirstPeerAnchor(); // src_anchor should be only 1 if (GraphUtils::RemoveEdge(src_anchor, end_in_anchor) != GRAPH_SUCCESS) { GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: RemoveEdge failed. node_name:%s, graph_name:%s", @@ -273,7 +271,6 @@ Status ge::GraphPartitioner::UpdateEndOpDesc(const NodePtr &dst_node, int input_ end_op_desc->MutableInputDesc(0)->SetDataType(input_desc.GetOriginDataType()); } else { GELOGI("Original data type of %s is undefined![data type is %s]", dst_node->GetName().c_str(), - TypeUtils::DataTypeToSerialString(input_desc.GetDataType()).c_str()); } // flush end format as original format @@ -399,7 +396,7 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr return SUCCESS; } -Status ge::GraphPartitioner::LinkInput2EndRemoveOriginalLink(ge::NodePtr input_node, ge::ComputeGraphPtr src_graph, +Status ge::GraphPartitioner::LinkInput2EndRemoveOrginalLink(ge::NodePtr input_node, ge::ComputeGraphPtr src_graph, ge::ComputeGraphPtr dst_graph) { if (input_node == nullptr || src_graph == nullptr || dst_graph == nullptr) { GELOGE(FAILED, "parameter ptr is null."); @@ -461,8 +458,8 @@ Status ge::GraphPartitioner::PutInputNodesInSubGraph(const ge::ComputeGraphPtr & GELOGE(FAILED, "[GraphPartitioner]: AddNode() failed."); return FAILED; } - if (LinkInput2EndRemoveOriginalLink(input_node, src_graph, dst_graph) != ge::SUCCESS) { - GELOGE(FAILED, "[GraphPartitioner]: LinkInput2EndRemoveOriginalLink() failed."); + if (LinkInput2EndRemoveOrginalLink(input_node, src_graph, dst_graph) != ge::SUCCESS) { + GELOGE(FAILED, "[GraphPartitioner]: LinkInput2EndRemoveOrginalLink() failed."); return FAILED; } } @@ -479,9 +476,8 @@ void ge::GraphPartitioner::AddNewGraphToPartition(ge::ComputeGraphPtr &input_gra } bool ge::GraphPartitioner::IsDataLike(ge::NodePtr node) { - return (node->GetType() == CONSTANT) || (node->GetType() == DATA) || - (node->GetType() == AIPPDATA) || (node->GetType() == CONSTANTOP) || - (node->GetType() == VARIABLE); + return (node->GetType() == CONSTANT) || (node->GetType() == DATA) || (node->GetType() == AIPPDATA) || + (node->GetType() == CONSTANTOP) || (node->GetType() == VARIABLE); } bool ge::GraphPartitioner::HasNoInput(ge::NodePtr node) { @@ -584,8 +580,6 @@ Status ge::GraphPartitioner::AddPartitionsToGraphNode(vectorSetInputFlag(sub_graph_input); sgi->SetOutputContext(output_name_); AddEndPldInformationToSubGraphInfo(sgi); - GELOGI("[GraphPartitioner]: subGraph engine name is %s, graph name is %s, stream label is %s", - engine_name.c_str(), subGraph->GetName().c_str(), sgi->GetStreamLabel().c_str()); output_subgraphs.push_back(sgi); } } @@ -601,7 +595,7 @@ bool ge::GraphPartitioner::IsMergeable(size_t parent_cluster, size_t child_clust // Check if parent_cluster,child_cluster has same engine or stream label if ((clusters_[parent_cluster]->engine_name_ != clusters_[child_cluster]->engine_name_) || (clusters_[parent_cluster]->stream_label_ != clusters_[child_cluster]->stream_label_)) { - GELOGI("Parent cluster %zu engine %s stream label %s, child cluster %zu engine %s stream label %s can not merge", + GELOGD("Parent cluster %zu engine %s stream label %s, child cluster %zu engine %s stream label %s can not merge", parent_cluster, clusters_[parent_cluster]->engine_name_.c_str(), clusters_[parent_cluster]->stream_label_.c_str(), child_cluster, clusters_[child_cluster]->engine_name_.c_str(), clusters_[child_cluster]->stream_label_.c_str()); @@ -611,7 +605,7 @@ bool ge::GraphPartitioner::IsMergeable(size_t parent_cluster, size_t child_clust RemoveEdge(parent_cluster, child_cluster); // Check if there is a path between parent and child, if return true, can not merge if (HasSecondPath(parent_cluster, child_cluster, upper_bound)) { - GELOGI("Find second path from %zu to %zu, upper bound is %zu", parent_cluster, child_cluster, upper_bound); + GELOGD("Find second path from %zu to %zu, upper bound is %zu", parent_cluster, child_cluster, upper_bound); InsertEdge(parent_cluster, child_cluster); return false; } @@ -926,7 +920,7 @@ Status ge::GraphPartitioner::SortSubGraphs(const ge::ComputeGraphPtr &compute_gr if (it != nullptr) { // rename subGraph based on rank string graph_name = - "partition" + std::to_string(partition_times_) + "_rank" + std::to_string(rank) + "_" + it->GetName(); + "partition" + std::to_string(partition_times_) + "_rank" + std::to_string(rank) + "_" + it->GetName(); it->SetName(graph_name); } rank++; diff --git a/src/ge/graph/partition/graph_partition.h b/src/ge/graph/partition/graph_partition.h index f6f58e47..3ac18e48 100644 --- a/src/ge/graph/partition/graph_partition.h +++ b/src/ge/graph/partition/graph_partition.h @@ -26,7 +26,6 @@ #include #include #include - #include "graph/compute_graph.h" #include "graph/manager/graph_manager_utils.h" #include "graph/operator_reg.h" @@ -40,7 +39,7 @@ using EdgeMap = std::set>; using ClusterSet = std::unordered_set; class Cluster { public: - size_t index_; // corresponding to rank of node + size_t index_; // corresponding to rank of node ClusterSet in_clu_; // inClusters index ClusterSet out_clu_; // outClusters index std::list nodes_; // including node of this cluster @@ -92,7 +91,7 @@ class GraphPartitioner { // add place holder and end node in src and dst graph graphStatus AddPlaceHolderEndInSrcDstGraph(const AnchorPtr &out_data_anchor, const AnchorPtr &peer_in_anchor, const ComputeGraphPtr &pld_graph, const ComputeGraphPtr &end_graph); - Status LinkInput2EndRemoveOriginalLink(NodePtr input_node, ComputeGraphPtr src_graph, ComputeGraphPtr dst_graph); + Status LinkInput2EndRemoveOrginalLink(NodePtr input_node, ComputeGraphPtr src_graph, ComputeGraphPtr dst_graph); /// After partition, put input nodes in srcGraph to dstGraph. Data will be linked to 'end'; /// the other end will be linked to 'placeholder' @@ -146,7 +145,7 @@ class GraphPartitioner { Mode mode_ = kPartitioning; uint32_t partition_times_ = 0; // times of call partition std::vector transfer_graph_; // contains all transfer graphs - std::unordered_map clusters_; // index to cluster ptr, contains all nodes + std::unordered_map clusters_; // index to cluster ptr, contains all nodes std::unordered_map> node_2_cluster_; // node map to cluster std::unordered_map, ComputeGraphPtr> cluster_2_partition_; // cluster map to subgraph }; diff --git a/src/ge/graph/passes/aicpu_constant_folding_pass.cc b/src/ge/graph/passes/aicpu_constant_folding_pass.cc index ecb6f1d3..24e58656 100644 --- a/src/ge/graph/passes/aicpu_constant_folding_pass.cc +++ b/src/ge/graph/passes/aicpu_constant_folding_pass.cc @@ -201,7 +201,7 @@ Status AicpuConstantFoldingPass::UpdateWorkSpaceAddr(string &task_info, STR_FWK_ void *workspace_addr = nullptr; GE_CHK_RT_RET(rtMalloc(&workspace_addr, task_info.size(), RT_MEMORY_HBM)); rtError_t rt_ret = - rtMemcpy(workspace_addr, task_info.size(), task_info.data(), task_info.size(), RT_MEMCPY_HOST_TO_DEVICE); + rtMemcpy(workspace_addr, task_info.size(), task_info.data(), task_info.size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(rt_ret, "rtMemcpy error"); GE_CHK_RT(rtFree(workspace_addr)); @@ -216,7 +216,7 @@ Status AicpuConstantFoldingPass::UpdateWorkSpaceAddr(string &task_info, STR_FWK_ Status AicpuConstantFoldingPass::UpdateInputAndOutputAddr(const vector &io_addrs, STR_FWK_OP_KERNEL &task) const { auto addrs_size = sizeof(uint64_t) * (io_addrs.size()); - if (addrs_size < 1) { + if (addrs_size <= 0) { GELOGE(FAILED, "addrs_size is less than 1 "); return FAILED; } @@ -326,12 +326,12 @@ Status AicpuConstantFoldingPass::LaunchSingleOpRunTask(const NodePtr &node, cons } std::function callback = [&]() { void *input_output_ptr = - reinterpret_cast(reinterpret_cast(aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr)); + reinterpret_cast(reinterpret_cast(aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr)); if (input_output_ptr != nullptr) { GE_CHK_RT(rtFree(input_output_ptr)); } void *workspace_addr_ptr = - reinterpret_cast(reinterpret_cast(aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr)); + reinterpret_cast(reinterpret_cast(aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr)); if (workspace_addr_ptr != nullptr) { GE_CHK_RT(rtFree(workspace_addr_ptr)); } @@ -384,12 +384,12 @@ Status AicpuConstantFoldingPass::LaunchMemCopyTask(const vector &data_ GE_CHK_RT(rtFree(reinterpret_cast(reinterpret_cast(item)))); // pointer cannot be null } void *input_output_ptr = - reinterpret_cast(reinterpret_cast(aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr)); + reinterpret_cast(reinterpret_cast(aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr)); if (input_output_ptr != nullptr) { GE_CHK_RT(rtFree(input_output_ptr)); } void *workspace_addr_ptr = - reinterpret_cast(reinterpret_cast(aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr)); + reinterpret_cast(reinterpret_cast(aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr)); if (workspace_addr_ptr != nullptr) { GE_CHK_RT(rtFree(workspace_addr_ptr)); } diff --git a/src/ge/graph/passes/aicpu_constant_folding_pass.h b/src/ge/graph/passes/aicpu_constant_folding_pass.h index bc495f5d..c27042fb 100644 --- a/src/ge/graph/passes/aicpu_constant_folding_pass.h +++ b/src/ge/graph/passes/aicpu_constant_folding_pass.h @@ -17,7 +17,6 @@ #ifndef GE_GRAPH_PASSES_AICPU_CONSTANT_FOLDING_PASS_H_ #define GE_GRAPH_PASSES_AICPU_CONSTANT_FOLDING_PASS_H_ -#include #include #include "common/opskernel/ops_kernel_info_store.h" diff --git a/src/ge/graph/passes/assert_pass.h b/src/ge/graph/passes/assert_pass.h index 528f6046..f8e35b32 100644 --- a/src/ge/graph/passes/assert_pass.h +++ b/src/ge/graph/passes/assert_pass.h @@ -24,7 +24,7 @@ namespace ge { class AssertPass : public BaseNodePass { public: - Status Run(NodePtr& node) override; + Status Run(NodePtr& node) override; /*lint !e148*/ private: /// @@ -33,7 +33,7 @@ class AssertPass : public BaseNodePass { /// @param nodes_unused nodes to be deleted /// @return void /// - void CollectUnusedNode(const NodePtr &assert_node, std::vector& nodes_unused); + void CollectUnusedNode(const NodePtr& assert_node, std::vector& nodes_unused); /*lint !e148*/ /// /// remove unused nodes from graph @@ -41,7 +41,7 @@ class AssertPass : public BaseNodePass { /// @param nodes_unused nodes to be deleted /// @return Status /// - Status RemoveUnusedNode(std::vector& nodes_unused); + Status RemoveUnusedNode(std::vector& nodes_unused); /*lint !e148*/ }; } // namespace ge #endif // GE_GRAPH_PASSES_ASSERT_PASS_H_ diff --git a/src/ge/graph/passes/atomic_addr_clean_pass.cc b/src/ge/graph/passes/atomic_addr_clean_pass.cc index 64315b9f..6ca1b98b 100644 --- a/src/ge/graph/passes/atomic_addr_clean_pass.cc +++ b/src/ge/graph/passes/atomic_addr_clean_pass.cc @@ -18,13 +18,13 @@ #include #include -#include #include +#include #include -#include "common/ge/ge_util.h" -#include "common/ge_inner_error_codes.h" #include "framework/common/debug/ge_log.h" +#include "common/ge_inner_error_codes.h" +#include "common/ge/ge_util.h" #include "graph/debug/ge_attr_define.h" #include "init/gelib.h" @@ -155,7 +155,7 @@ NodePtr AtomicAddrCleanPass::InsertAtomicAddrCleanNode(ComputeGraphPtr &graph) { Status AtomicAddrCleanPass::LinkToAtomicNode(const NodePtr &atomic_node, NodePtr &atomic_clean_node) { GE_IF_BOOL_EXEC(atomic_node == nullptr || atomic_clean_node == nullptr, - GELOGE(PARAM_INVALID, "param [atomic_node][atomic_clean_node] must not be null."); + GE_LOGE("param [atomic_node][atomic_clean_node] must not be null."); return PARAM_INVALID); InControlAnchorPtr in_ctrl_anchor = atomic_node->GetInControlAnchor(); OutControlAnchorPtr out_ctrl_anchor = atomic_clean_node->GetOutControlAnchor(); @@ -205,7 +205,7 @@ bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node) { in_data_anchor->GetPeerOutAnchor()->GetOwnerNode() != nullptr) { auto peer_in_node = in_data_anchor->GetPeerOutAnchor()->GetOwnerNode(); if (peer_in_node->GetType() == DATA) { - (void)AttrUtils::SetBool(peer_in_node->GetOpDesc(), "_need_memset", true); + (void)AttrUtils::SetBool(peer_in_node->GetOpDesc(), "_need_memset", true); // no need return GELOGI("Recognized atomic op %s from HCCL engine and input is DATA.", op_desc->GetName().c_str()); return false; } diff --git a/src/ge/graph/passes/atomic_addr_clean_pass.h b/src/ge/graph/passes/atomic_addr_clean_pass.h index be7b30fe..a4dd2e72 100644 --- a/src/ge/graph/passes/atomic_addr_clean_pass.h +++ b/src/ge/graph/passes/atomic_addr_clean_pass.h @@ -23,43 +23,54 @@ #include "inc/graph_pass.h" namespace ge { +/* + * Atomic addr clean task fusion + * Find all atomic op in graph,and insert one AtomicAddrClean op. + * To clean atomic output and workspace once for all. + * before iteration starts, empty AtomicAdd output, workspace memory + * op1 op1 + * | | + * op2(atomic) ==> op2 + * | | \ + * op3(atomic) op3 -AtomicClean + */ class AtomicAddrCleanPass : public GraphPass { public: Status Run(ComputeGraphPtr graph); private: - /// - /// HandleLoopGraph - /// @param graph - /// @return - /// + /** + * HandleLoopGraph + * @param graph + * @return + */ Status HandleLoopGraph(ComputeGraphPtr &graph, const vector &atomic_node_vec); - /// - /// HandleNormalGraph - /// @param graph - /// @return - /// + /** + * HandleNormalGraph + * @param graph + * @return + */ Status HandleNormalGraph(ComputeGraphPtr &graph, const vector &atomic_node_vec); - /// - /// Insert atomic clean node to graph - /// @param graph - /// @return - /// + /** + * Insert atomic clean node to graph + * @param graph + * @return + */ NodePtr InsertAtomicAddrCleanNode(ComputeGraphPtr &graph); - /// - /// Link control anchor from atomic clean node to atomic node - /// @param atomic_node - /// @param atomic_clean_node - /// @return - /// + /** + * Link control anchor from atomic clean node to atomic node + * @param atomic_node + * @param atomic_clean_node + * @return + */ Status LinkToAtomicNode(const NodePtr &atomic_node, NodePtr &atomic_clean_node); - /// - /// Check if this node is atomic op. - /// @param node - /// @return - /// + /** + * Check if this node is atomic op. + * @param node + * @return + */ bool IsAtomicOp(const NodePtr &node); vector hcom_node_vec_; diff --git a/src/ge/graph/passes/compile_nodes_pass.cc b/src/ge/graph/passes/compile_nodes_pass.cc index dfddc6ce..35d17523 100644 --- a/src/ge/graph/passes/compile_nodes_pass.cc +++ b/src/ge/graph/passes/compile_nodes_pass.cc @@ -19,11 +19,11 @@ #include #include -#include "framework/common/debug/ge_log.h" -#include "common/ge_inner_error_codes.h" #include "common/ge/ge_util.h" -#include "graph/op_desc.h" +#include "common/ge_inner_error_codes.h" +#include "framework/common/debug/ge_log.h" #include "graph/debug/ge_attr_define.h" +#include "graph/op_desc.h" namespace { const char *const kAICPUEngineName = "DNN_VM_AICPU"; @@ -31,54 +31,63 @@ const char *const kAICPUKernelLibName = "aicpu_kernel"; } // namespace namespace ge { -graphStatus CompileNodesPass::CompileOp(NodePtr node, - const std::shared_ptr &instance, - const string &kernel_lib_name) { - GE_CHECK_NOTNULL(node); - GE_CHECK_NOTNULL(instance); - OpsKernelInfoStorePtr kernel_info = instance->OpsKernelManagerObj().GetOpsKernelInfoStore(kernel_lib_name); - if (kernel_info == nullptr) { - GELOGE(ge::GE_GRAPH_PARAM_NULLPTR, "Get op %s ops kernel info store failed", node->GetName().c_str()); - return ge::GE_GRAPH_PARAM_NULLPTR; +graphStatus CompileNodesPass::Run(ComputeGraphPtr graph) { + GELOGI("[CompileNodesPass]: optimize begin."); + if (graph == nullptr) { + return GRAPH_SUCCESS; } - - // check if support - auto op_desc = node->GetOpDesc(); - auto ge_desc = MakeShared(op_desc); - if (ge_desc == nullptr) { - GELOGE(GE_GRAPH_MEMORY_ALLOC_FAILED, "Fail to malloc op desc."); - return FAILED; + std::shared_ptr instance = ge::GELib::GetInstance(); + if (instance == nullptr || !instance->InitFlag()) { + GELOGE(ge::GE_CLI_GE_NOT_INITIALIZED, "Run CompileNodesPass failed."); + return ge::GE_CLI_GE_NOT_INITIALIZED; } - string reason; - if (!(kernel_info->CheckAccuracySupported(*ge_desc, reason, true))) { - GELOGW("Check Accuracy Supported failed, go to aicpu engine, node name is %s, reason: %s", node->GetName().c_str(), - reason.c_str()); - op_desc->SetOpEngineName(kAICPUEngineName); - op_desc->SetOpKernelLibName(kAICPUKernelLibName); - } else { - // TBE compile op - vector node_vec = {node}; - auto ret = kernel_info->CompileOp(node_vec); - if (ret != ge::SUCCESS) { - GELOGE(ret, "Compile single op failed, node name is %s", node->GetName().c_str()); - return GRAPH_FAILED; + std::unordered_map> kernel_to_compile_nodes; + for (auto &node : graph->GetAllNodes()) { + if (node == nullptr) { + continue; + } + auto op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + continue; + } + auto node_need_compile = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NEED_COMPILE, node_need_compile); + if (!node_need_compile) { + continue; + } + // collect all supported compile node + string kernel_lib_name; + auto ret = GetSupportedKernel(node, instance, kernel_lib_name); + if (ret == GRAPH_SUCCESS) { + auto iter = kernel_to_compile_nodes.find(kernel_lib_name); + if (iter != kernel_to_compile_nodes.end()) { + iter->second.emplace_back(node); + } else { + std::vector node_vec{node}; + kernel_to_compile_nodes.insert(std::make_pair(kernel_lib_name, node_vec)); + } } } - + // compile node follow different kernel, currently only TBE kernel + auto result = CompileNodes(instance, kernel_to_compile_nodes); + if (result != GRAPH_SUCCESS) { + GELOGE(result, "Compile op failed."); + return result; + } + GELOGI("[CompileNodesPass]: Optimize success."); return GRAPH_SUCCESS; } -graphStatus CompileNodesPass::CompileNode(const NodePtr &node, const std::shared_ptr &instance) { - GE_CHECK_NOTNULL(node); - GE_CHECK_NOTNULL(instance); +graphStatus CompileNodesPass::GetSupportedKernel(const NodePtr &node, const std::shared_ptr instance, + string &kernel_lib_name) { auto op_desc = node->GetOpDesc(); if (op_desc == nullptr) { GELOGE(ge::GE_GRAPH_PARAM_NULLPTR, "Get op %s opdesc failed", node->GetName().c_str()); return ge::GE_GRAPH_PARAM_NULLPTR; } - string kernel_lib_name = op_desc->GetOpKernelLibName(); + // reset op kernel lib, find supported kernel + kernel_lib_name = op_desc->GetOpKernelLibName(); if (kernel_lib_name.empty()) { - // reset op kernel lib (void)instance->DNNEngineManagerObj().GetDNNEngineName(op_desc); kernel_lib_name = op_desc->GetOpKernelLibName(); if (kernel_lib_name.empty()) { @@ -87,43 +96,61 @@ graphStatus CompileNodesPass::CompileNode(const NodePtr &node, const std::shared return GRAPH_FAILED; } } + OpsKernelInfoStorePtr kernel_info = instance->OpsKernelManagerObj().GetOpsKernelInfoStore(kernel_lib_name); - return CompileOp(node, instance, kernel_lib_name); + if (kernel_info == nullptr) { + GELOGE(ge::GE_GRAPH_PARAM_NULLPTR, "Get op %s ops kernel info store failed", node->GetName().c_str()); + return ge::GE_GRAPH_PARAM_NULLPTR; + } + // begin accuracy supported check + if (!CheckAccuracySupport(kernel_info, instance, op_desc)) { + // if check accuracy support failed , try to go to aicpu engine + kernel_lib_name = kAICPUKernelLibName; + } + return GRAPH_SUCCESS; } -graphStatus CompileNodesPass::Run(ComputeGraphPtr graph) { - GELOGI("[CompileNodesPass]: optimize begin."); - if (graph == nullptr) { - return GRAPH_SUCCESS; +bool CompileNodesPass::CheckAccuracySupport(const OpsKernelInfoStorePtr &kernel_info, + const std::shared_ptr instance, OpDescPtr &op_desc) { + auto ge_desc = MakeShared(op_desc); + if (ge_desc == nullptr) { + GELOGE(GE_GRAPH_MEMORY_ALLOC_FAILED, "Fail to malloc op desc."); + return false; } - - std::shared_ptr instance = ge::GELib::GetInstance(); - if (instance == nullptr || !instance->InitFlag()) { - GELOGE(ge::GE_CLI_GE_NOT_INITIALIZED, "Run CompileNodesPass failed."); - return ge::GE_CLI_GE_NOT_INITIALIZED; + string reason; + if (!(kernel_info->CheckAccuracySupported(*ge_desc, reason, true))) { + GELOGW("Check Accuracy Supported return not support, node name is %s, reason: %s. Try to go to AICPU engine.", + op_desc->GetName().c_str(), reason.c_str()); + return false; } + return true; +} - for (auto &node : graph->GetAllNodes()) { - if (node == nullptr) { - continue; +graphStatus CompileNodesPass::CompileNodes(const std::shared_ptr instance, + std::unordered_map> &kernel_to_compile_nodes) { + // compile nodes, if kernel is aicpu, check support and set engine info. + OpsKernelInfoStorePtr kernel_info; + for (auto &kernel_nodes : kernel_to_compile_nodes) { + kernel_info = instance->OpsKernelManagerObj().GetOpsKernelInfoStore(kernel_nodes.first); + if (kernel_info == nullptr) { + GELOGE(ge::GE_GRAPH_PARAM_NULLPTR, "Get op %s ops kernel info store failed", kernel_nodes.first.c_str()); + return ge::GE_GRAPH_PARAM_NULLPTR; } - auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - continue; - } - auto node_need_compile = false; - (void) ge::AttrUtils::GetBool(op_desc, ATTR_NEED_COMPILE, node_need_compile); - if (!node_need_compile) { + string reason; + if (kernel_nodes.first == kAICPUKernelLibName) { + for (auto node : kernel_nodes.second) { + // this node will go to aicpu engine ,no need compile + node->GetOpDesc()->SetOpEngineName(kAICPUEngineName); + node->GetOpDesc()->SetOpKernelLibName(kAICPUKernelLibName); + } continue; } - - auto ret = CompileNode(node, instance); + auto ret = kernel_info->CompileOp(kernel_nodes.second); if (ret != GRAPH_SUCCESS) { - return ret; + GELOGE(ret, "Compile op failed, kernel name is %s", kernel_nodes.first.c_str()); + return GRAPH_FAILED; } } - - GELOGI("[CompileNodesPass]: Optimize success."); return GRAPH_SUCCESS; } } // namespace ge diff --git a/src/ge/graph/passes/compile_nodes_pass.h b/src/ge/graph/passes/compile_nodes_pass.h index cd5622ed..56df7b87 100644 --- a/src/ge/graph/passes/compile_nodes_pass.h +++ b/src/ge/graph/passes/compile_nodes_pass.h @@ -19,8 +19,8 @@ #include #include -#include "init/gelib.h" #include "inc/graph_pass.h" +#include "init/gelib.h" namespace ge { /// @@ -32,9 +32,13 @@ class CompileNodesPass : public GraphPass { virtual ~CompileNodesPass() {} graphStatus Run(ComputeGraphPtr graph) override; + private: - graphStatus CompileNode(const NodePtr &node, const std::shared_ptr &instance); - graphStatus CompileOp(NodePtr node, const std::shared_ptr &instance, const string &kernel_lib_name); + graphStatus GetSupportedKernel(const NodePtr &node, const std::shared_ptr instance, string &kernel_lib_name); + bool CheckAccuracySupport(const OpsKernelInfoStorePtr &kernel_info, const std::shared_ptr instance, + OpDescPtr &op_desc); + graphStatus CompileNodes(const std::shared_ptr instance, + std::unordered_map> &kernel_to_compile_nodes); }; } // namespace ge diff --git a/src/ge/graph/passes/constant_fuse_same_pass.cc b/src/ge/graph/passes/constant_fuse_same_pass.cc index 7bf46947..69726e5d 100644 --- a/src/ge/graph/passes/constant_fuse_same_pass.cc +++ b/src/ge/graph/passes/constant_fuse_same_pass.cc @@ -22,10 +22,10 @@ #include #include -#include "common/ge/ge_util.h" +#include "graph/debug/ge_attr_define.h" #include "framework/common/debug/ge_log.h" #include "framework/common/ge_inner_error_codes.h" -#include "graph/debug/ge_attr_define.h" +#include "common/ge/ge_util.h" #include "graph/utils/op_desc_utils.h" #include "graph/utils/type_utils.h" diff --git a/src/ge/graph/passes/control_op_attr_pass.cc b/src/ge/graph/passes/control_op_attr_pass.cc index a10d21e6..7afa34a4 100644 --- a/src/ge/graph/passes/control_op_attr_pass.cc +++ b/src/ge/graph/passes/control_op_attr_pass.cc @@ -30,7 +30,6 @@ #include "graph/utils/graph_utils.h" #include "init/gelib.h" - namespace { const uint32_t kMaxNodeNum = 350; } // namespace diff --git a/src/ge/graph/passes/control_trigger_pass.h b/src/ge/graph/passes/control_trigger_pass.h index 2d7182dd..39ee515d 100644 --- a/src/ge/graph/passes/control_trigger_pass.h +++ b/src/ge/graph/passes/control_trigger_pass.h @@ -25,15 +25,7 @@ #include "inc/graph_pass.h" namespace ge { -enum ControlNodeType { - kNotControlOp, - kCondSwitch, - kCondMerge, - kLoopSwitchT, - kLoopSwitchF, - kEnter, - kInvalidType -}; +enum ControlNodeType { kNotControlOp, kCondSwitch, kCondMerge, kLoopSwitchT, kLoopSwitchF, kEnter, kInvalidType }; class ControlTriggerPass : public GraphPass { public: @@ -57,4 +49,4 @@ class ControlTriggerPass : public GraphPass { std::unordered_map>> control_trigger_map_; }; } // namespace ge -#endif // GE_GRAPH_PASSES_CONTROL_TRIGGER_PASS_H_ +#endif // GE_GRAPH_PASSES_CONTROL_TRIGGER_PASS_H_ \ No newline at end of file diff --git a/src/ge/graph/passes/flow_ctrl_pass.cc b/src/ge/graph/passes/flow_ctrl_pass.cc index c027eb9f..0b72c806 100644 --- a/src/ge/graph/passes/flow_ctrl_pass.cc +++ b/src/ge/graph/passes/flow_ctrl_pass.cc @@ -20,10 +20,10 @@ #include #include -#include "common/ge/ge_util.h" #include "framework/common/debug/ge_log.h" -#include "graph/common/omg_util.h" #include "graph/debug/ge_attr_define.h" +#include "graph/common/omg_util.h" +#include "common/ge/ge_util.h" #include "graph/manager/graph_var_manager.h" #include "graph/passes/pass_utils.h" @@ -32,6 +32,12 @@ namespace ge { Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) { GE_CHECK_NOTNULL(compute_graph); + + if (AddGlobalStepVariableNode(compute_graph) != SUCCESS) { + GELOGE(FAILED, "Add global step variable node fail."); + return FAILED; + } + if (!PassUtils::IsNeedTrainIteFlowCtrl(compute_graph)) { GELOGI("No need FlowCtrl"); return NOT_CHANGED; @@ -109,7 +115,7 @@ NodePtr FlowCtrlPass::InsertOp(ComputeGraphPtr &compute_graph, const string &nod } } - GE_IF_BOOL_EXEC(compute_graph == nullptr, GELOGE(FAILED, "compute_graph is nullptr"); return nullptr); + GE_IF_BOOL_EXEC(compute_graph == nullptr, GE_LOGE("compute_graph is nullptr"); return nullptr); NodePtr node = compute_graph->AddNode(op_desc); if (node == nullptr) { GELOGE(FAILED, "add node failed, name:%s, type:%s.", node_name.c_str(), node_type.c_str()); @@ -153,14 +159,14 @@ NodePtr FlowCtrlPass::InsertStreamSwitchOp(ComputeGraphPtr &compute_graph, const // stream switch op need switch cond by attr. GE_IF_BOOL_EXEC( !AttrUtils::SetInt(stream_switch->GetOpDesc(), ATTR_NAME_STREAM_SWITCH_COND, static_cast(RT_LESS)), - GELOGE(FAILED, "set ATTR_NAME_STREAM_SWITCH_COND failed"); + GE_LOGE("set ATTR_NAME_STREAM_SWITCH_COND failed"); return nullptr); return stream_switch; } NodePtr FlowCtrlPass::AddVariableNode(ComputeGraphPtr &compute_graph, const string &name) { - GE_IF_BOOL_EXEC(compute_graph == nullptr, GELOGE(FAILED, "compute_graph is nullptr"); return nullptr); + GE_IF_BOOL_EXEC(compute_graph == nullptr, GE_LOGE("compute_graph is nullptr"); return nullptr); NodePtr exist_node = compute_graph->FindNode(name); if (exist_node != nullptr) { GELOGD("Node %s already exist, no need add.", name.c_str()); @@ -182,6 +188,37 @@ NodePtr FlowCtrlPass::AddVariableNode(ComputeGraphPtr &compute_graph, const stri return InsertOp(compute_graph, VARIABLE, name, input_desc_list, output_desc_list); } +Status FlowCtrlPass::AddGlobalStepVariableNode(ComputeGraphPtr &compute_graph) { + NodePtr output_node = compute_graph->FindNode(NODE_NAME_NET_OUTPUT); + if (output_node == nullptr) { + GELOGD("Node %s can't be found in graph %u", NODE_NAME_NET_OUTPUT.c_str(), compute_graph->GetGraphID()); + return SUCCESS; + } + NodePtr exist_node = compute_graph->FindNode(NODE_NAME_GLOBAL_STEP); + if (exist_node != nullptr) { + GELOGD("Node %s already exist, no need add.", NODE_NAME_GLOBAL_STEP.c_str()); + return SUCCESS; + } + // set global step tensor desc + GeTensorDesc tensor_desc(GeShape({1}), FORMAT_ND, DT_UINT64); + std::vector input_desc_list = {}; + std::vector output_desc_list = {tensor_desc}; + NodePtr global_step = InsertOp(compute_graph, VARIABLE, NODE_NAME_GLOBAL_STEP, input_desc_list, output_desc_list); + if (global_step == nullptr) { + GELOGE(FAILED, "Add global_step node failed, global_step is null."); + return FAILED; + } + + // add ctrl edges + graphStatus add_ret = GraphUtils::AddEdge(global_step->GetOutControlAnchor(), output_node->GetInControlAnchor()); + if (add_ret != GRAPH_SUCCESS) { + GELOGE(FAILED, "Add global_step to netoutput edge failed, add_ret=%u.", add_ret); + return FAILED; + } + GELOGD("Add global_step to netoutput edge in graph %u success", compute_graph->GetGraphID()); + return SUCCESS; +} + NodePtr FlowCtrlPass::InsertAssignOp(ge::ComputeGraphPtr &compute_graph, const string &node_type, const string &node_name, const NodePtr &ref_node, const NodePtr &value_node) { GE_IF_BOOL_EXEC(ref_node == nullptr || value_node == nullptr || ref_node->GetOpDesc() == nullptr || @@ -247,7 +284,7 @@ Status FlowCtrlPass::CreateIterCtrlTrueBranch(ComputeGraphPtr &compute_graph, co } GE_CHK_STATUS_RET(SetStreamLabel(active_node, active_name), "set stream label failed"); GE_IF_BOOL_EXEC(!AttrUtils::SetBool(active_node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, true), - GELOGE(FAILED, "set ATTR_NAME_IS_LOOP_ACTIVE failed"); + GE_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); return FAILED); // add ctrl edges @@ -300,7 +337,7 @@ Status FlowCtrlPass::CreateIterCtrlFalseBranch(ComputeGraphPtr &compute_graph, c } Status FlowCtrlPass::AddFpBpIteratorCtrl(ComputeGraphPtr &compute_graph, NodePtr &pre_node) { - GE_IF_BOOL_EXEC(pre_node == nullptr, GELOGE(FAILED, "pre_node is nullptr"); return FAILED); + GE_IF_BOOL_EXEC(pre_node == nullptr, GE_LOGE("pre_node is nullptr"); return FAILED); string pre_node_name = pre_node->GetName(); GELOGI("Add FpBp Iterator ctrl, pre node:%s.", pre_node_name.c_str()); // 1. Get or add variables @@ -376,7 +413,7 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph, * itersPerLoop loopCond */ GE_IF_BOOL_EXEC(loop_after_node == nullptr || compute_graph == nullptr, - GELOGE(FAILED, "loop after node or compute graph is null"); + GE_LOGE("loop after node or compute graph is null"); return FAILED); InDataAnchorPtr in_anchor = loop_after_node->GetInDataAnchor(0); if (in_anchor == nullptr || in_anchor->GetPeerOutAnchor() == nullptr) { @@ -398,7 +435,7 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph, } // 2. Add StreamSwitch and edges to switch_node. - GE_IF_BOOL_EXEC(loop_pre_node == nullptr, GELOGE(FAILED, "loop pre node is null"); return FAILED); + GE_IF_BOOL_EXEC(loop_pre_node == nullptr, GE_LOGE("loop pre node is null"); return FAILED); string switch_name = loop_pre_node->GetName() + "_" + NODE_NAME_STREAM_SWITCH; NodePtr switch_node = InsertStreamSwitchOp(compute_graph, switch_name, loop_cond_node, iter_per_loop_node); if (switch_node == nullptr) { @@ -432,7 +469,7 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph, GE_CHK_STATUS_RET(SetStreamLabel(active_node, active_name), "set stream label failed"); GE_IF_BOOL_EXEC(!AttrUtils::SetBool(active_node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, true), - GELOGE(FAILED, "set ATTR_NAME_IS_LOOP_ACTIVE failed"); + GE_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); return FAILED); add_ret = GraphUtils::AddEdge(switch_node->GetOutControlAnchor(), active_node->GetInControlAnchor()); diff --git a/src/ge/graph/passes/flow_ctrl_pass.h b/src/ge/graph/passes/flow_ctrl_pass.h index 4394d027..a928aaa7 100644 --- a/src/ge/graph/passes/flow_ctrl_pass.h +++ b/src/ge/graph/passes/flow_ctrl_pass.h @@ -85,16 +85,13 @@ class FlowCtrlPass : public GraphPass { NodePtr AddVariableNode(ComputeGraphPtr &compute_graph, const string &name); /// - /// insert GlobalStepAssignAdd to graph. - /// just for ME, please remove when ME do itself. + /// add global step variable node to graph. + /// if the variable is exists, do nothing. /// @param compute_graph graph - /// @param pre_node pre node - /// @param global_step global step node - /// @param loop_increment_node loop increment node - /// @return the GlobalStepAssignAdd node. if insert failed return nullptr. + /// @return SUCCESS: do success + /// Other: failed /// - NodePtr InsertGlobalStepAssignAddOp(ComputeGraphPtr &compute_graph, NodePtr &pre_node, const NodePtr &global_step, - const NodePtr &loop_increment_node); + Status AddGlobalStepVariableNode(ComputeGraphPtr &compute_graph); /// /// create switch true branch for big cycle. diff --git a/src/ge/graph/passes/folding_kernel/add_kernel.cc b/src/ge/graph/passes/folding_kernel/add_kernel.cc index 5d619798..7fa79373 100644 --- a/src/ge/graph/passes/folding_kernel/add_kernel.cc +++ b/src/ge/graph/passes/folding_kernel/add_kernel.cc @@ -50,6 +50,7 @@ const size_t kAddOutputSize = 1; } \ break; \ } + } // namespace template diff --git a/src/ge/graph/passes/folding_kernel/add_kernel.h b/src/ge/graph/passes/folding_kernel/add_kernel.h index d56898fc..23318c82 100644 --- a/src/ge/graph/passes/folding_kernel/add_kernel.h +++ b/src/ge/graph/passes/folding_kernel/add_kernel.h @@ -22,17 +22,16 @@ #include "inc/kernel.h" namespace ge { -class AddKernel: public Kernel { +class AddKernel : public Kernel { public: template bool OverflowCheck(const T &x, const T &y, DataType data_type); template - Status BCastAdd(const OpDescPtr &op_desc_ptr, - const std::vector &input, + Status BCastAdd(const OpDescPtr &op_desc_ptr, const std::vector &input, std::vector &v_output); - Status Compute(const ge::OpDescPtr op_desc_ptr, - const std::vector &input, std::vector &v_output) override; + Status Compute(const ge::OpDescPtr op_desc_ptr, const std::vector &input, + std::vector &v_output) override; }; } // namespace ge -#endif // GE_GRAPH_PASSES_FOLDING_KERNEL_ADD_KERNEL_H_ +#endif // GE_GRAPH_PASSES_FOLDING_KERNEL_ADD_KERNEL_H_ \ No newline at end of file diff --git a/src/ge/graph/passes/folding_kernel/concat_offset_kernel.cc b/src/ge/graph/passes/folding_kernel/concat_offset_kernel.cc index e734b9dd..fb95e3cf 100644 --- a/src/ge/graph/passes/folding_kernel/concat_offset_kernel.cc +++ b/src/ge/graph/passes/folding_kernel/concat_offset_kernel.cc @@ -97,4 +97,4 @@ Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector(sizeof(T) * data_num)); return MEMALLOC_FAILED; } - GE_IF_BOOL_EXEC(output->SetData(reinterpret_cast(buf.get()), static_cast(data_num * sizeof(T))) != - GRAPH_SUCCESS, - GELOGE(INTERNAL_ERROR, "set data failed"); - return INTERNAL_ERROR); + GE_IF_BOOL_EXEC( + output->SetData(reinterpret_cast(buf.get()), static_cast(data_num * sizeof(T))) != GRAPH_SUCCESS, + GELOGE(INTERNAL_ERROR, "set data failed"); + return INTERNAL_ERROR); Status ret = SUCCESS; switch (axis) { @@ -316,7 +316,7 @@ Status GatherV2Kernel::Check(const OpDescPtr &op_desc_ptr, const vectorGetData().size() == 0) || (tensor1->GetData().size() == 0) || (tensor2->GetData().size() == 0)); + ((tensor0->GetData().size() == 0) || (tensor1->GetData().size() == 0) || (tensor2->GetData().size() == 0)); if (size_is_zero) { GELOGE(NOT_CHANGED, "some input size is zero."); return NOT_CHANGED; @@ -385,8 +385,8 @@ Status GatherV2Kernel::Compute(const OpDescPtr op_desc_ptr, const vectorGetTensorDesc().GetDataType(); int64_t axis = axis_data_type == DT_INT32 - ? *(const_cast(reinterpret_cast(tensor2->GetData().data()))) - : *(const_cast(reinterpret_cast(tensor2->GetData().data()))); + ? *(const_cast(reinterpret_cast(tensor2->GetData().data()))) + : *(const_cast(reinterpret_cast(tensor2->GetData().data()))); axis = axis >= 0 ? axis : axis + x_shape.GetDimNum(); // check axis value if (axis < 0 || (axis + 1) > static_cast(x_shape.GetDimNum())) { diff --git a/src/ge/graph/passes/folding_kernel/mul_kernel.cc b/src/ge/graph/passes/folding_kernel/mul_kernel.cc index f328fb88..0c33ae53 100644 --- a/src/ge/graph/passes/folding_kernel/mul_kernel.cc +++ b/src/ge/graph/passes/folding_kernel/mul_kernel.cc @@ -45,14 +45,14 @@ Status IsOverflow(T const &a, T const &b, DataType &type) { } } -#define DEFINE_FUNC_WITH_STATUS_BY_TYPE(TYPE) \ - std::function func_##TYPE = []( \ - TYPE const &a, TYPE const &b, DataType &type, Status &ret) -> TYPE { \ - ret = IsOverflow(a, b, type); \ - if (ret != SUCCESS) { \ - return static_cast(0); \ - } \ - return a * b; \ +#define DEFINE_FUNC_WITH_STATUS_BY_TYPE(TYPE) \ + std::function func_##TYPE = \ + [](TYPE const &a, TYPE const &b, DataType &type, Status &ret) -> TYPE { \ + ret = IsOverflow(a, b, type); \ + if (ret != SUCCESS) { \ + return static_cast(0); \ + } \ + return a * b; \ }; #define SET_BCAST_COMPUTE_CASE(DTYPE, TYPE) \ @@ -64,6 +64,7 @@ Status IsOverflow(T const &a, T const &b, DataType &type) { case DTYPE: \ (void)output_ptr->SetData(reinterpret_cast(y_data_##TYPE.data()), y_data_##TYPE.size() * length); \ break; +// [no need to check result] DEFINE_FUNC_WITH_STATUS_BY_TYPE(int32_t) DEFINE_FUNC_WITH_STATUS_BY_TYPE(uint32_t) } // namespace diff --git a/src/ge/graph/passes/folding_kernel/pack_kernel.cc b/src/ge/graph/passes/folding_kernel/pack_kernel.cc index 706d2211..c79acd76 100644 --- a/src/ge/graph/passes/folding_kernel/pack_kernel.cc +++ b/src/ge/graph/passes/folding_kernel/pack_kernel.cc @@ -138,7 +138,7 @@ Status PackKernel::ValidateInputs(const ge::OpDescPtr &op_desc_ptr, const std::v } } if (!formats::IsShapeEqual(shape, dst_shape)) { - GELOGW("Shape of input %ld is not equal with input 0.", i); + GELOGW("Shape of input %ld is not equal wiht input 0.", i); return NOT_CHANGED; } } @@ -165,8 +165,7 @@ void PackKernel::ExpandDims(const int64_t axis, const std::vector &input, +Status PackKernel::CopyOutputData(const GeShape &final_shape, const std::vector &input, ge::GeTensorPtr &output_ptr) { int64_t times = 1; int64_t unit = 1; @@ -197,8 +196,8 @@ Status PackKernel::CopyOutputData(const GeShape &final_shape, for (int64_t j = 0; j < n_; j++) { // input range already check before. Range is [0,n_). const uint8_t *in_data = input[j]->GetData().data(); - auto ret = memcpy_s(buf.get() + dst_offset, output_size * data_size - dst_offset, in_data + src_offset, - data_size * unit); + auto ret = + memcpy_s(buf.get() + dst_offset, output_size * data_size - dst_offset, in_data + src_offset, data_size * unit); if (ret != EOK) { GELOGW("Memory copy failed."); return NOT_CHANGED; diff --git a/src/ge/graph/passes/folding_kernel/reduce_prod_kernel.cc b/src/ge/graph/passes/folding_kernel/reduce_prod_kernel.cc index 11fc3107..e05aa2e7 100644 --- a/src/ge/graph/passes/folding_kernel/reduce_prod_kernel.cc +++ b/src/ge/graph/passes/folding_kernel/reduce_prod_kernel.cc @@ -148,7 +148,7 @@ Status ReduceProdKernel::DataCal(const std::vector &input, GE_IF_BOOL_EXEC(output_ptr->SetData(reinterpret_cast(buf.get()), static_cast(head_dim_ * end_dim_ * sizeof(int32_t))) != GRAPH_SUCCESS, - GELOGE(INTERNAL_ERROR, "set data failed"); + GELOGW("set data failed"); return INTERNAL_ERROR); output_ptr->MutableTensorDesc().SetDataType(data_dtype); } @@ -159,13 +159,13 @@ void ReduceProdKernel::ShapeCal(const ge::OpDescPtr &op_desc_ptr, const std::vec ge::GeTensorPtr output_ptr) { ConstGeTensorPtr data_tensor = input.at(kReduceProdDataIndex); ConstGeTensorPtr axis_tensor = input.at(kReduceProdAxisIndex); - auto axis_data = axis_tensor->GetData().GetData(); + vector data_dims = data_tensor->GetTensorDesc().GetShape().GetDims(); + int32_t data_dim_size = static_cast(data_dims.size()); + const uint8_t *axis_data = axis_tensor->GetData().GetData(); if (axis_data == nullptr) { - GELOGE(FAILED, "Data of axis tensor is nullptr."); + GE_LOGE(param axis_data must not be null.); return; } - vector data_dims = data_tensor->GetTensorDesc().GetShape().GetDims(); - int32_t data_dim_size = static_cast(data_dims.size()); int32_t axis = *(const_cast(reinterpret_cast(axis_data))); bool keep_dims = false; if (!AttrUtils::GetBool(op_desc_ptr, "keep_dims", keep_dims)) { @@ -228,7 +228,7 @@ Status ReduceProdKernel::ComputeNoAxis(const ge::OpDescPtr &op_desc_ptr, const s } buf[0] = tmp_x; GE_IF_BOOL_EXEC(output_ptr->SetData(reinterpret_cast(buf.get()), sizeof(int32_t)) != GRAPH_SUCCESS, - GELOGE(INTERNAL_ERROR, "set data failed"); + GELOGW("set data failed"); return INTERNAL_ERROR); output_ptr->MutableTensorDesc().SetDataType(data_type); output_ptr->MutableTensorDesc().SetShape(GeShape()); diff --git a/src/ge/graph/passes/folding_kernel/reduce_prod_kernel.h b/src/ge/graph/passes/folding_kernel/reduce_prod_kernel.h index 326dd2f5..4b858b4a 100644 --- a/src/ge/graph/passes/folding_kernel/reduce_prod_kernel.h +++ b/src/ge/graph/passes/folding_kernel/reduce_prod_kernel.h @@ -42,4 +42,4 @@ class ReduceProdKernel : public Kernel { }; } // namespace ge -#endif // GE_GRAPH_PASSES_FOLDING_KERNEL_REDUCE_PROD_KERNEL_H_ +#endif // GE_GRAPH_PASSES_FOLDING_KERNEL_REDUCE_PROD_KERNEL_H_ \ No newline at end of file diff --git a/src/ge/graph/passes/folding_kernel/reshape_kernel.h b/src/ge/graph/passes/folding_kernel/reshape_kernel.h index 0e9692d9..c0100e51 100644 --- a/src/ge/graph/passes/folding_kernel/reshape_kernel.h +++ b/src/ge/graph/passes/folding_kernel/reshape_kernel.h @@ -22,10 +22,10 @@ #include "inc/kernel.h" namespace ge { -/// -/// @ingroup ge -/// @brief Reshape optimization operator processing -/// +/** + * @ingroup ge + * @brief Reshape optimization operator processing + */ class ReshapeKernel : public Kernel { public: Status Compute(const NodePtr &node_ptr) override; diff --git a/src/ge/graph/passes/folding_kernel/squeeze_kernel.h b/src/ge/graph/passes/folding_kernel/squeeze_kernel.h index 8212bac6..89fdf99b 100644 --- a/src/ge/graph/passes/folding_kernel/squeeze_kernel.h +++ b/src/ge/graph/passes/folding_kernel/squeeze_kernel.h @@ -22,10 +22,11 @@ #include "inc/kernel.h" namespace ge { -/// -/// @ingroup ge -/// @brief Squeeze optimization operator processing -/// +/** + * @ingroup ge + * @brief Squeeze optimization operator processing + * @author + */ class SqueezeKernel : public Kernel { public: // DimensionAdjustPass diff --git a/src/ge/graph/passes/folding_kernel/ssd_prior_box_kernel.cc b/src/ge/graph/passes/folding_kernel/ssd_prior_box_kernel.cc index 490a8950..d6cfdcca 100644 --- a/src/ge/graph/passes/folding_kernel/ssd_prior_box_kernel.cc +++ b/src/ge/graph/passes/folding_kernel/ssd_prior_box_kernel.cc @@ -25,8 +25,8 @@ #include "common/math/math_util.h" #include "common/math_util.h" #include "common/types.h" -#include "graph/debug/ge_attr_define.h" #include "framework/common/util.h" +#include "graph/debug/ge_attr_define.h" #include "graph/passes/pass_utils.h" #include "graph/utils/attr_utils.h" #include "inc/kernel_factory.h" @@ -131,7 +131,7 @@ Status SsdPriorboxKernel::GetPriorListParam(const OpDescPtr &op_desc, vector &v_output) override; private: - /// - /// Get specific op_desc attr value - /// @param [in] op_desc: Input op_desc - /// @param [in/out] img_width: img_width attr_value - /// @param [in/out] img_height: img_height attr_value - /// @param [in/out] step_h: step_h attr_value - /// @param [in/out] step_w: step_w attr_value - /// @param [in/out] layer_width: layer_width attr_value - /// @param [in/out] layer_height: layer_height attr_value - /// @return SUCCESS: node get attr value success - /// @return OTHERS: Execution failed - /// @author - /// + /** + * Get specific op_desc attr value + * @param [in] op_desc: Input op_desc + * @param [in/out] img_width: img_width attr_value + * @param [in/out] img_height: img_height attr_value + * @param [in/out] step_h: step_h attr_value + * @param [in/out] step_w: step_w attr_value + * @param [in/out] layer_width: layer_width attr_value + * @param [in/out] layer_height: layer_height attr_value + * @return SUCCESS: node get attr value success + * @return OTHERS: Execution failed + * @author + */ Status GetPriorSizeParam(const OpDescPtr &op_desc, int &img_width, int &img_height, float &step_w, float &step_h, int &layer_width, int &layer_height); - /// - /// Get specific op_desc size attr value,min_size_num etc. - /// @param [in] op_desc: Input op_desc - /// @param [in/out] offset: offset attr_value - /// @param [in/out] clip: clip attr_value - /// @return SUCCESS: get attr success - /// @return OTHERS: Execution failed - /// @author - /// + /** + * Get specific op_desc size attr value,min_size_num etc. + * @param [in] op_desc: Input op_desc + * @param [in/out] offset: offset attr_value + * @param [in/out] clip: clip attr_value + * @return SUCCESS: get attr success + * @return OTHERS: Execution failed + * @author + */ Status GetPriorOtherParam(const OpDescPtr &op_desc, float &offset, bool &clip); - /// - /// Get specific op_desc list attr value,min_size_list etc. - /// @param [in] op_desc: Input op_desc - /// @param [in/out] min_size_list: min_size_list attr_value - /// @param [in/out] max_size_list: max_size_list attr_value - /// @param [in/out] aspect_ratio_list: aspect_ratio_list attr_value - /// @param [in/out] variance_list: variance_list attr_value - /// @param [in/out] clip: clip attr_value - /// @return SUCCESS: get list attr success - /// @return OTHERS: Execution failed - /// @author - /// + /** + * Get specific op_desc list attr value,min_size_list etc. + * @param [in] op_desc: Input op_desc + * @param [in/out] min_size_list: min_size_list attr_value + * @param [in/out] max_size_list: max_size_list attr_value + * @param [in/out] aspect_ratio_list: aspect_ratio_list attr_value + * @param [in/out] variance_list: variance_list attr_value + * @param [in/out] clip: clip attr_value + * @return SUCCESS: get list attr success + * @return OTHERS: Execution failed + * @author + */ Status GetPriorListParam(const OpDescPtr &op_desc, vector &min_size_list, vector &max_size_list, vector &aspect_ratio_list, vector &variance_list); - /// - /// set variance param to output_data. - /// @param [in] variance: variance list - /// @param [in] dim: output_data second channel offset - /// @param [in] layer_height: layer_height - /// @param [in] num_priors: num_priors - /// @param [in/out] output_data: output_data - /// @return SUCCESS: set variance success - /// @return OTHERS: Execution failed - /// @author - /// + /** + * set variance param to output_data. + * @param [in] variance: variance list + * @param [in] dim: output_data second channel offset + * @param [in] layer_height: layer_height + * @param [in] num_priors: num_priors + * @param [in/out] output_data: output_data + * @return SUCCESS: set variance success + * @return OTHERS: Execution failed + * @author + */ Status SetVariance(const vector &variance, const int dim, const int32_t layer_height, const int32_t layer_width, const int num_priors, float *output_data); - /// - /// get num priors and dim size. - /// @param [in] aspect_ratios_size: aspect_ratio_list size - /// @param [in] min_sizes_size: min_size_list size - /// @param [in] max_sizes_size: max_size_list size - /// @param [in] layer_width: layer_width - /// @param [in] layer_height: layer_height - /// @param [in/out] num_priors: num_priors - /// @param [in/out] dim_size: dim_size - /// @return SUCCESS: set variance success - /// @return OTHERS: Execution failed - /// @author - /// + /** + * get num priors and dim size. + * @param [in] aspect_ratios_size: aspect_ratio_list size + * @param [in] min_sizes_size: min_size_list size + * @param [in] max_sizes_size: max_size_list size + * @param [in] layer_width: layer_width + * @param [in] layer_height: layer_height + * @param [in/out] num_priors: num_priors + * @param [in/out] dim_size: dim_size + * @return SUCCESS: set variance success + * @return OTHERS: Execution failed + * @author + */ Status GetNumPriorAndDimSize(uint aspect_ratios_size, uint min_sizes_size, uint max_sizes_size, int layer_width, int layer_height, int &num_priors, int &dim_size) const; void DataCalulate(float x, float y, float box_x, float box_y, int img_x, int img_y, vector &result); diff --git a/src/ge/graph/passes/folding_pass.cc b/src/ge/graph/passes/folding_pass.cc index e37c8336..9a64fc4f 100644 --- a/src/ge/graph/passes/folding_pass.cc +++ b/src/ge/graph/passes/folding_pass.cc @@ -18,16 +18,15 @@ #include #include -#include #include #include #include "framework/common/debug/ge_log.h" -#include "graph/debug/ge_attr_define.h" #include "graph/utils/graph_utils.h" #include "graph/utils/node_utils.h" #include "inc/kernel.h" #include "inc/kernel_factory.h" +#include "graph/debug/ge_attr_define.h" namespace ge { namespace folding_pass { @@ -172,7 +171,7 @@ Status FoldingPass::DealWithInNodes(NodePtr &node) { node->GetName().c_str()); auto identity_name = node->GetName() + "_ctrl_identity_" + std::to_string(in_data_anchor->GetIdx()); auto identity = - AddIdentityNodeToGraph(identity_name, node->GetOpDesc()->GetInputDesc(in_data_anchor->GetIdx()), graph); + AddIdentityNodeToGraph(identity_name, node->GetOpDesc()->GetInputDesc(in_data_anchor->GetIdx()), graph); if (identity == nullptr) { GELOGE(INTERNAL_ERROR, "Failed to add identity node to graph."); return INTERNAL_ERROR; diff --git a/src/ge/graph/passes/get_original_format_pass.cc b/src/ge/graph/passes/get_original_format_pass.cc index e0f382e8..d6f795fb 100644 --- a/src/ge/graph/passes/get_original_format_pass.cc +++ b/src/ge/graph/passes/get_original_format_pass.cc @@ -19,12 +19,12 @@ #include #include "common/debug/log.h" +#include "framework/common/debug/ge_log.h" #include "common/types.h" #include "common/util.h" -#include "framework/common/debug/ge_log.h" -#include "framework/omg/omg_inner_types.h" #include "graph/utils/attr_utils.h" #include "graph/utils/op_desc_utils.h" +#include "framework/omg/omg_inner_types.h" using domi::DOMI_TENSOR_NCHW; using domi::DOMI_TENSOR_NHWC; @@ -96,9 +96,9 @@ Status GetOriginalFormatPass::SetOriginalFormat(const ge::ComputeGraphPtr &graph OpDescPtr tmpSecondOpPtr = bias_node_ptr->GetInDataNodes().at(1)->GetOpDesc(); GE_CHECK_NOTNULL(tmpSecondOpPtr); GE_IF_BOOL_EXEC( - !AttrUtils::GetInt(tmp_first_op_ptr, ATTR_NAME_FORMAT, first_input_format), continue_flag = true; break); + !AttrUtils::GetInt(tmp_first_op_ptr, ATTR_NAME_FORMAT, first_input_format), continue_flag = true; break); GE_IF_BOOL_EXEC( - !AttrUtils::GetInt(tmpSecondOpPtr, ATTR_NAME_FORMAT, second_input_format), continue_flag = true; break); + !AttrUtils::GetInt(tmpSecondOpPtr, ATTR_NAME_FORMAT, second_input_format), continue_flag = true; break); if (first_input_format != second_input_format) { GELOGW("biasadd node is followed two nodes with different format, get original format failed"); diff --git a/src/ge/graph/passes/hccl_memcpy_pass.cc b/src/ge/graph/passes/hccl_memcpy_pass.cc index 13846c4e..4588c1c2 100644 --- a/src/ge/graph/passes/hccl_memcpy_pass.cc +++ b/src/ge/graph/passes/hccl_memcpy_pass.cc @@ -19,9 +19,9 @@ #include #include "common/debug/log.h" -#include "common/ge/ge_util.h" -#include "common/ge_inner_error_codes.h" #include "framework/common/debug/ge_log.h" +#include "common/ge_inner_error_codes.h" +#include "common/ge/ge_util.h" #include "framework/common/types.h" #include "graph/utils/graph_utils.h" diff --git a/src/ge/graph/passes/isolated_op_remove_pass.cc b/src/ge/graph/passes/isolated_op_remove_pass.cc index 746b5f6b..152104eb 100644 --- a/src/ge/graph/passes/isolated_op_remove_pass.cc +++ b/src/ge/graph/passes/isolated_op_remove_pass.cc @@ -20,7 +20,6 @@ #include "common/types.h" #include "common/util.h" - namespace ge { Status IsolatedOpRemovePass::Run(ge::ComputeGraphPtr graph) { GE_CHECK_NOTNULL(graph); diff --git a/src/ge/graph/passes/iterator_op_pass.cc b/src/ge/graph/passes/iterator_op_pass.cc index 95ffcd90..35bfe496 100644 --- a/src/ge/graph/passes/iterator_op_pass.cc +++ b/src/ge/graph/passes/iterator_op_pass.cc @@ -27,9 +27,8 @@ #include "graph/common/omg_util.h" #include "graph/graph.h" #include "graph/node.h" -#include "graph/passes/pass_utils.h" #include "graph/utils/graph_utils.h" - +#include "graph/passes/pass_utils.h" namespace ge { const char *const kGetNext = "GetNext"; @@ -105,18 +104,19 @@ ge::NodePtr IteratorOpPass::InsertMemcpyAsyncNode(const ge::NodePtr &pre_node, c } // Control out OutControlAnchorPtr out_ctrl_anchor = pre_node->GetOutControlAnchor(); - GE_IF_BOOL_EXEC(out_ctrl_anchor != nullptr, for (auto &peer_in_ctrl_anchor - : out_ctrl_anchor->GetPeerInControlAnchors()) { - ge::graphStatus status = GraphUtils::RemoveEdge(out_ctrl_anchor, peer_in_ctrl_anchor); - GE_CHK_BOOL_EXEC(status == GRAPH_SUCCESS, return nullptr, "Remove edge failed, dst node: %s.", - peer_in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); - status = GraphUtils::AddEdge(memcpy_async_node->GetOutControlAnchor(), peer_in_ctrl_anchor); - GE_CHK_BOOL_EXEC(status == GRAPH_SUCCESS, return nullptr, - "Graph add memcpyAsync op out ctrl edge fail, dst node: %s.", - peer_in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); - GELOGI("Graph add memcpyAsync op out ctrl edge, dst node: %s.", - peer_in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); - }); + GE_IF_BOOL_EXEC( + out_ctrl_anchor != nullptr, for (auto &peer_in_ctrl_anchor + : out_ctrl_anchor->GetPeerInControlAnchors()) { + ge::graphStatus status = GraphUtils::RemoveEdge(out_ctrl_anchor, peer_in_ctrl_anchor); + GE_CHK_BOOL_EXEC(status == GRAPH_SUCCESS, return nullptr, "Remove edge failed, dst node: %s.", + peer_in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); + status = GraphUtils::AddEdge(memcpy_async_node->GetOutControlAnchor(), peer_in_ctrl_anchor); + GE_CHK_BOOL_EXEC(status == GRAPH_SUCCESS, return nullptr, + "Graph add memcpyAsync op out ctrl edge fail, dst node: %s.", + peer_in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); + GELOGI("Graph add memcpyAsync op out ctrl edge, dst node: %s.", + peer_in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); + }); GELOGI("Insert memcpyAsync op success."); return memcpy_async_node; @@ -142,9 +142,9 @@ ge::OpDescPtr IteratorOpPass::CreateMemcpyAsyncOp(const ge::NodePtr &pre_node) { ge::OpDescPtr pre_node_op_desc = pre_node->GetOpDesc(); GE_CHK_BOOL_EXEC(pre_node_op_desc != nullptr, return nullptr, "OpDesc of pre_node is invalid."); - auto out_size = static_cast(pre_node_op_desc->GetOutputsSize()); - GELOGI("Create memcpyAsync op, pre_node out_size: %u.", out_size); - for (uint32_t i = 0; i < out_size; i++) { + size_t out_size = pre_node_op_desc->GetOutputsSize(); + GELOGI("Create memcpyAsync op, pre_node out_size: %zu.", out_size); + for (size_t i = 0; i < out_size; i++) { GE_CHK_BOOL_EXEC(op_desc->AddInputDesc(pre_node_op_desc->GetOutputDesc(i)) == GRAPH_SUCCESS, return nullptr, "Create memcpyAsync op:add input desc fail."); GE_CHK_BOOL_EXEC(op_desc->AddOutputDesc(pre_node_op_desc->GetOutputDesc(i)) == GRAPH_SUCCESS, return nullptr, diff --git a/src/ge/graph/passes/link_gen_mask_nodes_pass.cc b/src/ge/graph/passes/link_gen_mask_nodes_pass.cc index d01fe231..33e22035 100644 --- a/src/ge/graph/passes/link_gen_mask_nodes_pass.cc +++ b/src/ge/graph/passes/link_gen_mask_nodes_pass.cc @@ -70,8 +70,8 @@ Status LinkGenMaskNodesPass::Run(ComputeGraphPtr graph) { auto dest_anchor = dest_node->GetInControlAnchor(); GE_CHECK_NOTNULL(dest_anchor); - graphStatus ret = src_anchor->LinkTo(dest_anchor); - if (ret != GRAPH_SUCCESS) { + graphStatus status = src_anchor->LinkTo(dest_anchor); + if (status != GRAPH_SUCCESS) { GELOGE(FAILED, "Link from %s to %s failed.", src_node->GetName().c_str(), dest_node->GetName().c_str()); return FAILED; } diff --git a/src/ge/graph/passes/link_gen_mask_nodes_pass.h b/src/ge/graph/passes/link_gen_mask_nodes_pass.h index 57343137..4f24600e 100644 --- a/src/ge/graph/passes/link_gen_mask_nodes_pass.h +++ b/src/ge/graph/passes/link_gen_mask_nodes_pass.h @@ -17,10 +17,6 @@ #ifndef GE_GRAPH_PASSES_LINK_GEN_MASK_NODES_PASS_H_ #define GE_GRAPH_PASSES_LINK_GEN_MASK_NODES_PASS_H_ -#include -#include -#include - #include "graph/graph.h" #include "inc/graph_pass.h" diff --git a/src/ge/graph/passes/multi_batch_pass.h b/src/ge/graph/passes/multi_batch_pass.h index 6e3f5e46..fd4e6b57 100644 --- a/src/ge/graph/passes/multi_batch_pass.h +++ b/src/ge/graph/passes/multi_batch_pass.h @@ -47,4 +47,4 @@ class MultiBatchPass : public GraphPass { std::vector> batch_head_nodes_; }; } // namespace ge -#endif // GE_GRAPH_PASSES_MULTI_BATCH_PASS_H_ +#endif // GE_GRAPH_PASSES_MULTI_BATCH_PASS_H_ \ No newline at end of file diff --git a/src/ge/graph/passes/net_output_pass.cc b/src/ge/graph/passes/net_output_pass.cc index f3d4aa54..63db7ce3 100644 --- a/src/ge/graph/passes/net_output_pass.cc +++ b/src/ge/graph/passes/net_output_pass.cc @@ -22,14 +22,13 @@ #include #include -#include "common/ge/ge_util.h" #include "framework/common/debug/ge_log.h" #include "framework/common/ge_inner_error_codes.h" -#include "graph/debug/ge_attr_define.h" +#include "common/ge/ge_util.h" #include "graph/passes/pass_utils.h" #include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" - +#include "graph/debug/ge_attr_define.h" namespace ge { Status NetOutputPass::GetRetvalOutputInfo(const ge::NodePtr &node, @@ -144,9 +143,9 @@ void NetOutputPass::AddInOutForNetOutputOp(const ge::ComputeGraphPtr &graph, con return; } ge::GeTensorDesc out_desc = src_node->GetOpDesc()->GetOutputDesc(src_index); - GE_IF_BOOL_EXEC(net_output_desc->AddInputDesc(out_desc) != SUCCESS, GELOGW("add input desc failed"); return); + GE_IF_BOOL_EXEC(net_output_desc->AddInputDesc(out_desc) != SUCCESS, GELOGW("add input desc failed"); return ); TensorUtils::SetOutputTensor(out_desc, true); - GE_IF_BOOL_EXEC(net_output_desc->AddOutputDesc(out_desc) != SUCCESS, GELOGW("add output desc failed"); return); + GE_IF_BOOL_EXEC(net_output_desc->AddOutputDesc(out_desc) != SUCCESS, GELOGW("add output desc failed"); return ); } Status NetOutputPass::RemoveUnusedNode(const ge::ComputeGraphPtr &graph) { @@ -260,7 +259,7 @@ Status NetOutputPass::AddEdgesForNetOutput(const ge::ComputeGraphPtr &graph, con NodePtr src_node = item.first; GE_CHECK_NOTNULL(src_node); graphStatus status = - GraphUtils::AddEdge(src_node->GetOutDataAnchor(item.second), net_out_node->GetInDataAnchor(net_input_index)); + GraphUtils::AddEdge(src_node->GetOutDataAnchor(item.second), net_out_node->GetInDataAnchor(net_input_index)); if (status != GRAPH_SUCCESS) { GELOGE(INTERNAL_ERROR, "AddEdge failed, src name:%s, src index:%d, dst index:%d.", src_node->GetName().c_str(), item.second, net_input_index); diff --git a/src/ge/graph/passes/no_reshape_op_remove_pass.cc b/src/ge/graph/passes/no_reshape_op_remove_pass.cc index cf214920..59f4eae3 100644 --- a/src/ge/graph/passes/no_reshape_op_remove_pass.cc +++ b/src/ge/graph/passes/no_reshape_op_remove_pass.cc @@ -19,16 +19,16 @@ #include #include +#include "framework/common/debug/ge_log.h" #include "common/op/attr_value_util.h" #include "common/op/ge_op_utils.h" #include "common/types.h" #include "common/util.h" -#include "framework/common/debug/ge_log.h" #include "framework/common/ge_inner_error_codes.h" -#include "framework/omg/omg_inner_types.h" #include "graph/utils/graph_utils.h" #include "graph/utils/op_desc_utils.h" #include "graph/utils/tensor_utils.h" +#include "framework/omg/omg_inner_types.h" namespace ge { namespace { @@ -45,6 +45,8 @@ Status NoReshapeOpRemovePass::Run(ge::NodePtr &node) { return CheckNodeShapeAndForamt(node); } else if (op_desc_ptr->GetType() == RESHAPE) { if (op_desc_ptr->GetName() == kReshapeName) { + NodePtr out_data_node; + NodePtr reshape_out_node; std::vector types; std::list path; path.push_back(node); @@ -53,8 +55,8 @@ Status NoReshapeOpRemovePass::Run(ge::NodePtr &node) { types.emplace_back(CORRELATION); // check reshape out data node fit specific type bool reshape_correlation_flag = true; - for (const auto &type : types) { - if (!CheckOutDataNodesType(type, path)) { + for (size_t i = 0; i < types.size(); i++) { + if (!CheckOutDataNodesType(types[i], path)) { reshape_correlation_flag = false; break; } @@ -173,8 +175,8 @@ vector NoReshapeOpRemovePass::CheckLinkedReshape(ge::NodePtr &node) continue; } Node::Vistor out_data_nodes = src_node->GetOutDataNodes(); - if ((out_data_nodes.size() == 1) && (out_data_nodes.at(0)->GetOpDesc() != nullptr) - && (out_data_nodes.at(0)->GetOpDesc()->GetType() == RESHAPE)) { + if ((out_data_nodes.size() == 1) && (out_data_nodes.at(0)->GetOpDesc() != nullptr) && + (out_data_nodes.at(0)->GetOpDesc()->GetType() == RESHAPE)) { NodePtr dst_node = out_data_nodes.at(0); node_path.push_back(dst_node); GeTensorDesc dst_output_desc = dst_node->GetOpDesc()->GetOutputDesc(0); diff --git a/src/ge/graph/passes/pass_manager.cc b/src/ge/graph/passes/pass_manager.cc index a8919a55..6cfcfe6b 100644 --- a/src/ge/graph/passes/pass_manager.cc +++ b/src/ge/graph/passes/pass_manager.cc @@ -19,9 +19,10 @@ #include "common/types.h" #include "common/util.h" #include "graph/utils/node_utils.h" +#include "omg/omg_inner_types.h" namespace ge { -const vector& PassManager::GraphPasses() const { return graph_passes_; } +const vector &PassManager::GraphPasses() const { return graph_passes_; } Status PassManager::AddPass(GraphPass *pass) { GE_CHECK_NOTNULL(pass); diff --git a/src/ge/graph/passes/pass_utils.cc b/src/ge/graph/passes/pass_utils.cc index 58679614..9b3f6b5f 100644 --- a/src/ge/graph/passes/pass_utils.cc +++ b/src/ge/graph/passes/pass_utils.cc @@ -22,12 +22,11 @@ #include #include -#include "cce/dnn_base_def.hpp" -#include "common/ge/ge_util.h" +#include "framework/common/debug/ge_log.h" #include "common/ge_inner_error_codes.h" +#include "common/ge/ge_util.h" #include "common/op/ge_op_utils.h" #include "common/types.h" -#include "framework/common/debug/ge_log.h" #include "graph/common/omg_util.h" #include "graph/debug/ge_attr_define.h" #include "graph/ge_tensor.h" @@ -40,7 +39,7 @@ namespace ge { namespace { const uint32_t kShapeDimSize = 1; -const uint32_t kDimSizeTwo = 2; +const uint32_t DIM_SIZE_TWO = 2; } // namespace Status PassUtils::ConstructTensorDescWithData(const GeTensorDesc &out_desc, std::vector &data, @@ -89,14 +88,13 @@ Status PassUtils::ConstructTensorDescWithData(const GeTensorDesc &out_desc, std: template Status PassUtils::ConstructTensorDescWithData(const GeTensorDesc &out_desc, T *buf, uint32_t len, std::vector &v_output, const bool scalar_output) { - bool empty_shape = ((len == 1) && scalar_output) || (len == 0); // construct TensorDesc - GeShape out_shape = (empty_shape ? GeShape() : GeShape({len})); + GeShape out_shape = (scalar_output ? GeShape() : GeShape({len})); GeTensorDesc output_tensor_desc(out_desc); output_tensor_desc.SetShape(out_shape); GeTensorPtr output_tensor_ptr = - MakeShared(output_tensor_desc, reinterpret_cast(buf), sizeof(T) * len); + MakeShared(output_tensor_desc, reinterpret_cast(buf), sizeof(T) * len); if (output_tensor_ptr == nullptr) { GELOGE(MEMALLOC_FAILED, "Make shared failed"); return MEMALLOC_FAILED; diff --git a/src/ge/graph/passes/print_op_pass.h b/src/ge/graph/passes/print_op_pass.h index e9e6d67e..bcb62e12 100644 --- a/src/ge/graph/passes/print_op_pass.h +++ b/src/ge/graph/passes/print_op_pass.h @@ -19,6 +19,7 @@ #include "framework/common/debug/ge_log.h" #include "framework/common/types.h" +#include "graph/debug/ge_attr_define.h" #include "graph/common/omg_util.h" #include "graph/debug/ge_attr_define.h" #include "graph/graph.h" diff --git a/src/ge/graph/passes/same_transdata_breadth_fusion_pass.cc b/src/ge/graph/passes/same_transdata_breadth_fusion_pass.cc index 9074d35e..59e32e10 100644 --- a/src/ge/graph/passes/same_transdata_breadth_fusion_pass.cc +++ b/src/ge/graph/passes/same_transdata_breadth_fusion_pass.cc @@ -168,6 +168,7 @@ graphStatus SameTransdataBreadthFusionPass::ReLinkDataOutput2PreNode(const NodeP const NodePtr &relink_node) { GE_CHECK_NOTNULL(pre_out_anchor); GE_CHECK_NOTNULL(transdata_node); + auto transdata_peer_out_control_anchor = pre_out_anchor->GetOwnerNode()->GetOutControlAnchor(); for (auto &out_anchor : transdata_node->GetAllOutDataAnchors()) { // relink data edge for (auto &transdata_peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { @@ -195,7 +196,7 @@ graphStatus SameTransdataBreadthFusionPass::ReLinkDataOutput2PreNode(const NodeP } graphStatus SameTransdataBreadthFusionPass::ReLinkOutDataPeerInControlNodes2PreNode( - const NodePtr &transdata_node, const OutDataAnchorPtr &pre_out_anchor) { + const NodePtr &transdata_node, const OutDataAnchorPtr &pre_out_anchor) { GE_CHECK_NOTNULL(pre_out_anchor); GE_CHECK_NOTNULL(transdata_node); auto transdata_peer_out_control_anchor = pre_out_anchor->GetOwnerNode()->GetOutControlAnchor(); @@ -250,8 +251,8 @@ graphStatus SameTransdataBreadthFusionPass::ReLinkTransdataOutput2PreNode(const } graphStatus SameTransdataBreadthFusionPass::ReLinkOutControlPeerInControlAnchors( - const NodePtr &transdata_node_keep, const OutDataAnchorPtr &pre_out_anchor, - const OutControlAnchorPtr &transdata_peer_out_control_anchor) { + const NodePtr &transdata_node_keep, const OutDataAnchorPtr &pre_out_anchor, + const OutControlAnchorPtr &transdata_peer_out_control_anchor) { GE_CHECK_NOTNULL(transdata_node_keep); GE_CHECK_NOTNULL(pre_out_anchor); auto out_control_anchor = transdata_node_keep->GetOutControlAnchor(); @@ -287,8 +288,8 @@ graphStatus SameTransdataBreadthFusionPass::ReLinkOutControlPeerInControlAnchors } graphStatus SameTransdataBreadthFusionPass::ReLinkOutControlPeerInDataAnchors( - const NodePtr &transdata_node_keep, const OutDataAnchorPtr &pre_out_anchor, - const OutControlAnchorPtr &transdata_peer_out_control_anchor) { + const NodePtr &transdata_node_keep, const OutDataAnchorPtr &pre_out_anchor, + const OutControlAnchorPtr &transdata_peer_out_control_anchor) { GE_CHECK_NOTNULL(transdata_node_keep); GE_CHECK_NOTNULL(pre_out_anchor); auto out_control_anchor = transdata_node_keep->GetOutControlAnchor(); @@ -326,8 +327,8 @@ graphStatus SameTransdataBreadthFusionPass::ReLinkOutControlPeerInDataAnchors( } graphStatus SameTransdataBreadthFusionPass::ReLinkTransdataControlOutput2PreNode( - const NodePtr &transdata_node_keep, const OutDataAnchorPtr &pre_out_anchor, - const OutControlAnchorPtr &transdata_peer_out_control_anchor) { + const NodePtr &transdata_node_keep, const OutDataAnchorPtr &pre_out_anchor, + const OutControlAnchorPtr &transdata_peer_out_control_anchor) { if (ReLinkOutControlPeerInControlAnchors(transdata_node_keep, pre_out_anchor, transdata_peer_out_control_anchor) != GRAPH_SUCCESS) { return GRAPH_FAILED; @@ -564,8 +565,8 @@ void SameTransdataBreadthFusionPass::CopyTensorDesc(const ConstGeTensorDescPtr & } graphStatus SameTransdataBreadthFusionPass::LinkNewCastNode2RemainTransdata( - const ComputeGraphPtr &graph, const vector &same_transdata_nodes, const OutDataAnchorPtr &transdata_out_anchor, - const NodePtr &transdata_node_keep) { + const ComputeGraphPtr &graph, const vector &same_transdata_nodes, const OutDataAnchorPtr &transdata_out_anchor, + const NodePtr &transdata_node_keep) { for (size_t i = 1; i < same_transdata_nodes.size(); ++i) { int anchors_index = same_transdata_nodes[i]; bool reuse_nodes = AllNodeBeforeTransdataHasOneDataOut(anchors_index); @@ -703,9 +704,8 @@ graphStatus SameTransdataBreadthFusionPass::AddCastNode(const ComputeGraphPtr &g } graphStatus SameTransdataBreadthFusionPass::GetSubGraphsBetweenNormalAndTransdataNode( - OutDataAnchorPtr &out_anchor, - std::vector>> &sub_graphs_out, - std::vector> &nodes_list) { + OutDataAnchorPtr &out_anchor, std::vector>> &sub_graphs_out, + std::vector> &nodes_list) { graphStatus ret = GRAPH_SUCCESS; if (out_anchor == nullptr) { GELOGE(GRAPH_FAILED, "out data anchor is null!This should not happen!"); diff --git a/src/ge/graph/passes/same_transdata_breadth_fusion_pass.h b/src/ge/graph/passes/same_transdata_breadth_fusion_pass.h index be745056..5d8d09b4 100644 --- a/src/ge/graph/passes/same_transdata_breadth_fusion_pass.h +++ b/src/ge/graph/passes/same_transdata_breadth_fusion_pass.h @@ -35,17 +35,18 @@ class SameTransdataBreadthFusionPass : public GraphPass { private: graphStatus ExtractTransNode(const ComputeGraphPtr &graph); - graphStatus GetSubGraphsBetweenNormalAndTransdataNode(OutDataAnchorPtr &out_anchor, - std::vector>> &sub_graphs_out, - std::vector> &nodes_list); + graphStatus GetSubGraphsBetweenNormalAndTransdataNode( + OutDataAnchorPtr &out_anchor, + std::vector>> &sub_graphs_out, + std::vector> &nodes_list); void GetSubGraphNodesInfo(); + void EraseInvalidAnchorsPair(); + OpDescPtr GetCastOp(const GeTensorDesc &in_desc, const GeTensorDesc &out_desc); - graphStatus AddCastNode(const ComputeGraphPtr &graph, - int anchors_index, - OutDataAnchorPtr &pre_out_anchor, + graphStatus AddCastNode(const ComputeGraphPtr &graph, int anchors_index, OutDataAnchorPtr &pre_out_anchor, NodePtr &first_link_node); void GetSameTransdataNode(vector &same_transdata_nodes); @@ -53,8 +54,10 @@ class SameTransdataBreadthFusionPass : public GraphPass { graphStatus ReLinkTransdataOutput2PreNode(const NodePtr &transdata_node, const OutDataAnchorPtr &pre_out_anchor, const NodePtr &relink_node); - graphStatus LinkNewCastNode2RemainTransdata(const ComputeGraphPtr &graph, - const vector &same_transdata_nodes, + graphStatus RelinkTransdataControlEdge(ComputeGraphPtr graph, NodePtr transdata_node_remove, + NodePtr transdata_node_keep); + + graphStatus LinkNewCastNode2RemainTransdata(const ComputeGraphPtr &graph, const vector &same_transdata_nodes, const OutDataAnchorPtr &transdata_out_anchor, const NodePtr &transdata_node_keep); @@ -74,8 +77,7 @@ class SameTransdataBreadthFusionPass : public GraphPass { graphStatus RelinkInControlEdge(const NodePtr &node_src, const NodePtr &node_dst); - graphStatus ReLinkDataOutput2PreNode(const NodePtr &transdata_node, - const OutDataAnchorPtr &pre_out_anchor, + graphStatus ReLinkDataOutput2PreNode(const NodePtr &transdata_node, const OutDataAnchorPtr &pre_out_anchor, const NodePtr &relink_node); graphStatus ReLinkOutDataPeerInControlNodes2PreNode(const NodePtr &transdata_node, diff --git a/src/ge/graph/passes/shape_operate_op_remove_pass.cc b/src/ge/graph/passes/shape_operate_op_remove_pass.cc index 894f3e68..b701e065 100644 --- a/src/ge/graph/passes/shape_operate_op_remove_pass.cc +++ b/src/ge/graph/passes/shape_operate_op_remove_pass.cc @@ -15,8 +15,8 @@ */ #include "graph/passes/shape_operate_op_remove_pass.h" - #include "common/debug/log.h" +#include "common/types.h" #include "common/util.h" #include "graph/utils/attr_utils.h" diff --git a/src/ge/graph/passes/switch_op_pass.cc b/src/ge/graph/passes/switch_op_pass.cc index 50b1cf92..6aa61352 100644 --- a/src/ge/graph/passes/switch_op_pass.cc +++ b/src/ge/graph/passes/switch_op_pass.cc @@ -558,7 +558,7 @@ Status SwitchOpPass::MarkBranchs(OutDataAnchorPtr &peer_cond_anchor, NodePtr &st switch_list.emplace_back(false_node_list); switch_list.emplace_back(true_node_list); auto result = cond_node_map_.insert( - std::pair>>(peer_cond_anchor, switch_list)); + std::pair>>(peer_cond_anchor, switch_list)); GE_IF_BOOL_EXEC(!result.second, { GELOGE(INTERNAL_ERROR, "cond_node_map_ insert fail, node: %s", stream_switch->GetName().c_str()); return FAILED; @@ -632,7 +632,7 @@ Status SwitchOpPass::AddConstNode(ComputeGraphPtr &graph, NodePtr &stream_switch auto resize_value = (int32_t)value; GeTensorDesc data_desc = op_desc->GetInputDesc(1); GeTensorPtr const_value = - MakeShared(data_desc, reinterpret_cast(&resize_value), sizeof(int32_t)); + MakeShared(data_desc, reinterpret_cast(&resize_value), sizeof(int32_t)); if (const_value == nullptr) { GELOGE(FAILED, "Create tensor fail."); return FAILED; @@ -921,7 +921,7 @@ void SwitchOpPass::MarkCycleDependence(const std::unordered_mapGetName()}; @@ -1116,7 +1116,7 @@ void SwitchOpPass::RemoveControlEdges(NodePtr &node) { /// @return void /// void SwitchOpPass::ReplaceControlEdges(NodePtr &old_node, NodePtr &new_node) { - GE_IF_BOOL_EXEC(old_node == new_node, return); + GE_IF_BOOL_EXEC(old_node == new_node, return ); CopyControlEdges(old_node, new_node); RemoveControlEdges(old_node); } diff --git a/src/ge/graph/passes/transop_depth_fusion_pass.cc b/src/ge/graph/passes/transop_depth_fusion_pass.cc index 140efce4..ad98369f 100644 --- a/src/ge/graph/passes/transop_depth_fusion_pass.cc +++ b/src/ge/graph/passes/transop_depth_fusion_pass.cc @@ -17,16 +17,14 @@ #include "graph/passes/transop_depth_fusion_pass.h" #include - +#include "framework/common/debug/ge_log.h" #include "common/ge_inner_error_codes.h" #include "common/types.h" -#include "framework/common/debug/ge_log.h" #include "graph/compute_graph.h" #include "graph/ge_tensor.h" #include "graph/op_desc.h" #include "graph/utils/graph_utils.h" - namespace ge { graphStatus TransOpDepthFusionPass::Run(ComputeGraphPtr graph) { GELOGI("[TransOpDepthFusionPass]: optimize in depth begin..."); @@ -71,11 +69,11 @@ graphStatus TransOpDepthFusionPass::RecursiveInDepth(const InDataAnchorPtr &dst_ temp_depth++; if (temp_depth >= max_recursive_depth) { GELOGI( - "Caution: recursive depth is become %u." - "It's abnormally to have so many trans ops between two normal ops" - "Please check your graph in detail!" - "The search terminate here and continue to another branch.", - temp_depth); + "Caution: recursive depth is become %u." + "It's abnormally to have so many trans ops between two normal ops" + "Please check your graph in detail!" + "The search terminate here and continue to another branch.", + temp_depth); temp_depth--; return GRAPH_SUCCESS; } @@ -261,15 +259,15 @@ graphStatus TransOpDepthFusionPass::RelinkEdges(const OutDataAnchorPtr &new_out_ GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(old_out_anchor, in_data_anchor), "remove edge failed"); GE_CHK_STATUS_RET(GraphUtils::AddEdge(new_out_anchor, in_data_anchor), "add edge failed"); GELOGD( - "relink edges before remove node, remove data edge between node: %s, " - "type: %s and node: %s, type: %s.", - old_out_anchor->GetOwnerNode()->GetName().c_str(), old_out_anchor->GetOwnerNode()->GetType().c_str(), - in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetOwnerNode()->GetType().c_str()); + "relink edges before remove node, remove data edge between node: %s, " + "type: %s and node: %s, type: %s.", + old_out_anchor->GetOwnerNode()->GetName().c_str(), old_out_anchor->GetOwnerNode()->GetType().c_str(), + in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetOwnerNode()->GetType().c_str()); GELOGD( - "relink edges before remove node, add data edge between node: %s, " - "type: %s and node: %s, type: %s.", - new_out_anchor->GetOwnerNode()->GetName().c_str(), new_out_anchor->GetOwnerNode()->GetType().c_str(), - in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetOwnerNode()->GetType().c_str()); + "relink edges before remove node, add data edge between node: %s, " + "type: %s and node: %s, type: %s.", + new_out_anchor->GetOwnerNode()->GetName().c_str(), new_out_anchor->GetOwnerNode()->GetType().c_str(), + in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetOwnerNode()->GetType().c_str()); bool is_linked = false; auto dst_node = in_data_anchor->GetOwnerNode(); @@ -284,10 +282,10 @@ graphStatus TransOpDepthFusionPass::RelinkEdges(const OutDataAnchorPtr &new_out_ auto in_ctrl_anchor = dst_node->GetInControlAnchor(); GE_CHK_STATUS_RET(GraphUtils::AddEdge(out_ctrl_anchor, in_ctrl_anchor), "add edge failed"); GELOGD( - "relink edges before remove node, add control edge between node: %s," - " type: %s and node: %s, type: %s.", - src_node->GetName().c_str(), src_node->GetType().c_str(), dst_node->GetName().c_str(), - dst_node->GetType().c_str()); + "relink edges before remove node, add control edge between node: %s," + " type: %s and node: %s, type: %s.", + src_node->GetName().c_str(), src_node->GetType().c_str(), dst_node->GetName().c_str(), + dst_node->GetType().c_str()); } return GRAPH_SUCCESS; } diff --git a/src/ge/graph/passes/transop_without_reshape_fusion_pass.h b/src/ge/graph/passes/transop_without_reshape_fusion_pass.h index 4999c731..31cd23de 100644 --- a/src/ge/graph/passes/transop_without_reshape_fusion_pass.h +++ b/src/ge/graph/passes/transop_without_reshape_fusion_pass.h @@ -65,8 +65,8 @@ class TransOpWithoutReshapeFusionPass : public GraphPass { bool OpAccuracyAbilityCheck(const OpDescPtr &op_desc); graphStatus GetSubGraphsBetweenNormalNode( - const OutDataAnchorPtr &out_anchor, vector>> &sub_graphs_out, - vector> &nodes_list); + const OutDataAnchorPtr &out_anchor, vector>> &sub_graphs_out, + vector> &nodes_list); graphStatus GetSubGraphNodesInfo(); diff --git a/src/ge/graph/passes/transpose_transdata_pass.cc b/src/ge/graph/passes/transpose_transdata_pass.cc index 891c10ef..b36dda6a 100644 --- a/src/ge/graph/passes/transpose_transdata_pass.cc +++ b/src/ge/graph/passes/transpose_transdata_pass.cc @@ -19,7 +19,6 @@ #include #include #include - #include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" #include "graph/utils/type_utils.h" @@ -29,7 +28,7 @@ namespace { const char *const kAttrNameSrcFormat = "src_format"; -} +} // namespace namespace ge { Status TransposeTransDataPass::Run(NodePtr &node) { @@ -109,6 +108,7 @@ Status TransposeTransDataPass::RemoveTranspose(NodePtr &node) { // If delete Transpos/TransposeD, change its peer in ctrl anchor to its input node // If not delete, need do nothing auto origin_node_in = node->GetInDataNodes().at(0); + GE_CHECK_NOTNULL(node->GetOutControlAnchor()); for (auto &peer_anchor : node->GetOutControlAnchor()->GetPeerInControlAnchors()) { GE_CHECK_NOTNULL(origin_node_in); GE_CHECK_NOTNULL(origin_node_in->GetOutControlAnchor()); @@ -175,12 +175,13 @@ bool TransposeTransDataPass::FusionIfNeed(OpDescPtr &op_desc, OpDescPtr &transda // add attr to fused TransData, then will be rebuild string new_node_name = op_desc->GetName() + transdata_op_desc->GetName(); transdata_op_desc->SetName(new_node_name); - GE_IF_BOOL_EXEC(!AttrUtils::SetBool(transdata_op_desc, ATTR_NEED_COMPILE, true), - GELOGW("set ext attr failed"); return false); + GE_IF_BOOL_EXEC(!AttrUtils::SetBool(transdata_op_desc, ATTR_NEED_COMPILE, true), GELOGW("set ext attr failed"); + return false); string format_val = TypeUtils::FormatToSerialString(src_format); GE_IF_BOOL_EXEC(!AttrUtils::SetStr(transdata_op_desc, kAttrNameSrcFormat, format_val), - GELOGW("set kAttrNameSrcFormat failed"); return false); + GELOGW("set kAttrNameSrcFormat failed"); + return false); GELOGI("TransposeTransDataPass, fuse to be node %s.", transdata_op_desc->GetName().c_str()); return true; } @@ -195,11 +196,11 @@ void TransposeTransDataPass::CopyInputEdges(NodePtr &origin_node, NodePtr &new_n } OutDataAnchorPtr out_anchor = origin_node->GetInDataAnchor(0)->GetPeerOutAnchor(); new_in_data_anchor->UnlinkAll(); - GE_IF_BOOL_EXEC(new_in_data_anchor->LinkFrom(out_anchor) != GRAPH_SUCCESS, GELOGW("Link failed"); return); + GE_IF_BOOL_EXEC(new_in_data_anchor->LinkFrom(out_anchor) != GRAPH_SUCCESS, GELOGW("Link failed"); return ); // control anchor only link to control anchor - GE_IF_BOOL_EXEC(GraphUtils::CopyInCtrlEdges(origin_node, new_node) != GRAPH_SUCCESS, - GELOGW("Copy in ctrl edges failed"); return); + GE_IF_BOOL_EXEC( + GraphUtils::CopyInCtrlEdges(origin_node, new_node) != GRAPH_SUCCESS, GELOGW("Copy in ctrl edges failed"); return ); } bool TransposeTransDataPass::TransDataCheckAccuracySupported(const OpDescPtr &op_desc) { diff --git a/src/ge/graph/passes/unused_op_remove_pass.cc b/src/ge/graph/passes/unused_op_remove_pass.cc index e268bf5b..0b083d4e 100644 --- a/src/ge/graph/passes/unused_op_remove_pass.cc +++ b/src/ge/graph/passes/unused_op_remove_pass.cc @@ -22,11 +22,14 @@ #include #include "common/debug/log.h" +#include "common/op/ge_op_utils.h" #include "common/types.h" #include "common/util.h" #include "graph/utils/attr_utils.h" #include "graph/utils/graph_utils.h" #include "graph/utils/op_desc_utils.h" +#include "inc/pass_manager.h" +#include "graph/passes/isolated_op_remove_pass.h" using domi::SUCCESS; @@ -120,11 +123,10 @@ bool UnusedOpRemovePass::IsExceptions(const NodePtr &node) { GE_CHK_BOOL_EXEC(op_def != nullptr, return false, "opdesc is nullptr"); // permute optimised in permute_pass.cpp if (op_def->GetType() == PERMUTE) { - GE_IF_BOOL_EXEC( - (node->GetInDataNodes().size() != 0 && - (node->GetInDataNodes().at(0) != nullptr && node->GetInDataNodes().at(0)->GetOpDesc() != nullptr && - node->GetInDataNodes().at(0)->GetOpDesc()->GetType() == ATTENTIONDECODER)), - return false); + GE_IF_BOOL_EXEC((node->GetInDataNodes().size() != 0 && + (node->GetInDataNodes().at(0) != nullptr && node->GetInDataNodes().at(0)->GetOpDesc() != nullptr && + node->GetInDataNodes().at(0)->GetOpDesc()->GetType() == ATTENTIONDECODER)), + return false); return true; } return false; diff --git a/src/ge/graph/passes/update_net_output_pass.cc b/src/ge/graph/passes/update_net_output_pass.cc index 10a3e202..bf887115 100644 --- a/src/ge/graph/passes/update_net_output_pass.cc +++ b/src/ge/graph/passes/update_net_output_pass.cc @@ -26,14 +26,12 @@ namespace ge { static std::map kOutputTypeStrToDataType = { - {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"INT8", ge::DT_INT8}, {"INT16", ge::DT_INT16}, - {"UINT16", ge::DT_UINT16}, {"UINT8", ge::DT_UINT8}, {"INT32", ge::DT_INT32}, {"INT64", ge::DT_INT64}, - {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}, + {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"INT8", ge::DT_INT8}, {"INT16", ge::DT_INT16}, + {"UINT16", ge::DT_UINT16}, {"UINT8", ge::DT_UINT8}, {"INT32", ge::DT_INT32}, {"INT64", ge::DT_INT64}, + {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}, }; -static void SetNetoutputDataType(OpDescPtr &op_desc, - uint32_t index, - ge::DataType output_data_type) { +static void SetNetoutputDataType(OpDescPtr &op_desc, uint32_t index, ge::DataType output_data_type) { // op_desc is judged not nullptr auto net_output_in_desc = op_desc->MutableInputDesc(index); if (net_output_in_desc != nullptr) { @@ -56,9 +54,7 @@ static Status SetNetoutputFormat(OpDescPtr op_desc, uint32_t index, ge::Format f auto net_output_in_desc = op_desc->MutableInputDesc(index); GE_CHECK_NOTNULL(net_output_in_desc); ge::Format old_format = net_output_in_desc->GetFormat(); - bool support = ((old_format == FORMAT_NC1HWC0) || - (old_format == FORMAT_NCHW) || - (old_format == FORMAT_NHWC)); + bool support = ((old_format == FORMAT_NC1HWC0) || (old_format == FORMAT_NCHW) || (old_format == FORMAT_NHWC)); if (!support) { GELOGE(INTERNAL_ERROR, "The node %s format [%s] is unsupported", op_desc->GetName().c_str(), TypeUtils::FormatToSerialString(old_format).c_str()); @@ -101,8 +97,7 @@ static Status SetNetoutputFormat(OpDescPtr op_desc, uint32_t index, ge::Format f net_output_out_desc->SetOriginShape(ge::GeShape(dst_shape_dims)); net_output_out_desc->SetFormat(format); net_output_out_desc->SetOriginFormat(format); - GELOGI("Update out desc, format:%s", - TypeUtils::FormatToSerialString(op_desc->GetOutputDesc(0).GetFormat()).c_str()); + GELOGI("Update out desc, format:%s", TypeUtils::FormatToSerialString(op_desc->GetOutputDesc(0).GetFormat()).c_str()); return SUCCESS; } diff --git a/src/ge/graph/passes/var_is_initialized_op_pass.cc b/src/ge/graph/passes/var_is_initialized_op_pass.cc index 4266cf1f..c88db80c 100644 --- a/src/ge/graph/passes/var_is_initialized_op_pass.cc +++ b/src/ge/graph/passes/var_is_initialized_op_pass.cc @@ -15,10 +15,8 @@ */ #include "graph/passes/var_is_initialized_op_pass.h" - #include #include - #include "framework/common/debug/ge_log.h" #include "common/ge/ge_util.h" #include "graph/anchor.h" @@ -50,12 +48,10 @@ Status VarIsInitializedOpPass::Run(NodePtr &node) { if (CheckSrcNode(node, inited) != SUCCESS) { return FAILED; } - GELOGI("The variable inited status %s on node %s", - inited ? "true" : "false", node->GetName().c_str()); + GELOGI("The variable inited status %s on node %s", inited ? "true" : "false", node->GetName().c_str()); ret = ChangeNodeToConstant(node, inited); - GELOGI("Change VarIsInitializedOp %s to be Constant %s end.", - node->GetName().c_str(), inited ? "true" : "false"); + GELOGI("Change VarIsInitializedOp %s to be Constant %s end.", node->GetName().c_str(), inited ? "true" : "false"); return ret; } @@ -63,9 +59,7 @@ Status VarIsInitializedOpPass::CheckSrcNode(const NodePtr &node, bool &inited) c GE_CHECK_NOTNULL(node); auto input_nodes = node->GetInDataNodes(); if (input_nodes.size() != kVarIsInitializedIOCnt) { - GELOGE(FAILED, - "[%s] Node input data nodes size [%zu] is not equal 1.", - node->GetName().c_str(), + GELOGE(FAILED, "[%s] Node input data nodes size [%zu] is not equal 1.", node->GetName().c_str(), input_nodes.size()); return FAILED; } @@ -131,8 +125,7 @@ Status VarIsInitializedOpPass::ProcessInAnchor(NodePtr &node, NodePtr &new_node) GE_CHECK_NOTNULL(new_node); auto in_anchors = node->GetAllInDataAnchors(); auto out_anchors = node->GetAllOutDataAnchors(); - if ((in_anchors.size() != kVarIsInitializedIOCnt) || - (out_anchors.size() != kVarIsInitializedIOCnt)) { + if ((in_anchors.size() != kVarIsInitializedIOCnt) || (out_anchors.size() != kVarIsInitializedIOCnt)) { GELOGE(FAILED, "[%s] Node input/output data anchors" " size [%lu][%lu] is not all equal 1.", @@ -151,8 +144,8 @@ Status VarIsInitializedOpPass::ProcessInAnchor(NodePtr &node, NodePtr &new_node) } auto src_node = peer_out_anchor->GetOwnerNode(); if (GraphUtils::AddEdge(src_node->GetOutControlAnchor(), new_node->GetInControlAnchor()) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to link control edges from var %s to new const %s", - src_node->GetName().c_str(), new_node->GetName().c_str()); + GELOGE(FAILED, "Failed to link control edges from var %s to new const %s", src_node->GetName().c_str(), + new_node->GetName().c_str()); return FAILED; } @@ -255,15 +248,15 @@ Status VarIsInitializedOpPass::UpdateInitedVars(const NodePtr &node) { if (inited_vars != nullptr) { GE_CHECK_NOTNULL(node->GetOpDesc()); nodes_to_inited_vars_[node->GetOpDesc()->GetId()] = inited_vars; - GELOGD("Inited vars on this graph when node %s, inited vars count %zu", - node->GetName().c_str(), inited_vars->size()); + GELOGD("Inited vars on this graph when node %s, inited vars count %zu", node->GetName().c_str(), + inited_vars->size()); } return SUCCESS; } std::set *VarIsInitializedOpPass::CreateInitedVars() { - std::unique_ptr> inited_vars_keeper(new(std::nothrow) std::set()); + std::unique_ptr> inited_vars_keeper(new (std::nothrow) std::set()); if (inited_vars_keeper == nullptr) { GELOGE(OUT_OF_MEMORY, "Failed to alloc set memory"); return nullptr; diff --git a/src/ge/graph/passes/variable_format_pass.cc b/src/ge/graph/passes/variable_format_pass.cc index ebd5f8b6..9b5e284e 100644 --- a/src/ge/graph/passes/variable_format_pass.cc +++ b/src/ge/graph/passes/variable_format_pass.cc @@ -66,9 +66,8 @@ bool VariableFormatPass::ConfirmUseOpAndIndexByAnchor(const ge::InDataAnchorPtr GELOGD("ConfirmUseOpAndIndex, var name %s, dst_type = %s, input index %d", dst_node->GetName().c_str(), dst_type.c_str(), input_index); - GE_IF_BOOL_EXEC( - confirm_ops.count(dst_type) > 0, - GE_IF_BOOL_EXEC(confirm_ops.at(dst_type).count(input_index) > 0, use_node = dst_node; return true);); + GE_IF_BOOL_EXEC(confirm_ops.count(dst_type) > 0, + GE_IF_BOOL_EXEC(confirm_ops.at(dst_type).count(input_index) > 0, use_node = dst_node; return true);); return false; } @@ -106,8 +105,8 @@ Status VariableFormatPass::UpdateApplyMomentumInputFormat(const ge::NodePtr &nod GE_CHECK_NOTNULL(op_desc_ptr->MutableOutputDesc(0)); NodePtr in_node = node->GetInDataAnchor(0)->GetPeerOutAnchor()->GetOwnerNode(); if (in_node != nullptr) { - string inOpType = in_node->GetType(); - if ((inOpType == VARIABLE) && (in_node->GetOpDesc() != nullptr)) { + string in_op_type = in_node->GetType(); + if ((in_op_type == VARIABLE) && (in_node->GetOpDesc() != nullptr)) { ge::Format format = in_node->GetOpDesc()->MutableOutputDesc(0)->GetFormat(); op_desc_ptr->MutableInputDesc(0)->SetFormat(format); op_desc_ptr->MutableInputDesc(0)->SetOriginFormat(format); diff --git a/src/ge/graph/passes/variable_op_pass.cc b/src/ge/graph/passes/variable_op_pass.cc index 04a0ae72..755de62d 100644 --- a/src/ge/graph/passes/variable_op_pass.cc +++ b/src/ge/graph/passes/variable_op_pass.cc @@ -75,9 +75,9 @@ Status ByPassTransNode(NodePtr &trans_node, NodePtr &ref_node) { auto prev_trans_node_out_anchor = trans_in_anchor->GetPeerOutAnchor(); if (prev_trans_node_out_anchor == nullptr) { GELOGW( - "The trans node %s does not have an input, so the ref node %s does" - " not have any inputs after bypass", - trans_node->GetName().c_str(), trans_node->GetName().c_str()); + "The trans node %s does not have an input, so the ref node %s does" + " not have any inputs after bypass", + trans_node->GetName().c_str(), trans_node->GetName().c_str()); } else { ret = GraphUtils::AddEdge(prev_trans_node_out_anchor, ref_in_anchor); if (ret != GRAPH_SUCCESS) { @@ -161,14 +161,14 @@ Status VariableOpPass::Run(ge::ComputeGraphPtr graph) { auto start_iter = fusion_road.begin(); auto end_iter = fusion_road.rbegin(); GELOGI( - "Trans variable data for %s from format %s to %s, shape %s to %s " - "data-type %s to %s, path len %zu success", - node->GetName().c_str(), TypeUtils::FormatToSerialString(start_iter->input.GetFormat()).c_str(), - TypeUtils::FormatToSerialString(end_iter->output.GetFormat()).c_str(), - formats::ShapeToString(start_iter->input.GetShape().GetDims()).c_str(), - formats::ShapeToString(end_iter->output.GetShape().GetDims()).c_str(), - TypeUtils::DataTypeToSerialString(start_iter->input.GetDataType()).c_str(), - TypeUtils::DataTypeToSerialString(end_iter->output.GetDataType()).c_str(), fusion_road.size()); + "Trans variable data for %s from format %s to %s, shape %s to %s " + "data-type %s to %s, path len %zu success", + node->GetName().c_str(), TypeUtils::FormatToSerialString(start_iter->input.GetFormat()).c_str(), + TypeUtils::FormatToSerialString(end_iter->output.GetFormat()).c_str(), + formats::ShapeToString(start_iter->input.GetShape().GetDims()).c_str(), + formats::ShapeToString(end_iter->output.GetShape().GetDims()).c_str(), + TypeUtils::DataTypeToSerialString(start_iter->input.GetDataType()).c_str(), + TypeUtils::DataTypeToSerialString(end_iter->output.GetDataType()).c_str(), fusion_road.size()); ret = VarManager::Instance(graph->GetSessionID())->SetTransRoad(node->GetName(), fusion_road); if (ret != SUCCESS) { @@ -222,9 +222,9 @@ Status VariableOpPass::DealFusion(const ge::NodePtr &var_node) { trans_node->GetType().c_str(), var_node->GetName().c_str()); if (trans_node->GetOutDataNodes().size() > 1) { GELOGD( - "The trans node %s type %s connecting with var-ref %s has more" - " than one output data nodes, unlink the edge between them", - trans_node->GetName().c_str(), trans_node->GetType().c_str(), ref_node->GetName().c_str()); + "The trans node %s type %s connecting with var-ref %s has more" + " than one output data nodes, unlink the edge between them", + trans_node->GetName().c_str(), trans_node->GetType().c_str(), ref_node->GetName().c_str()); if (ByPassTransNode(trans_node, ref_node) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to bypass trans node %s to ref %s", trans_node->GetName().c_str(), ref_node->GetName().c_str()); @@ -232,9 +232,9 @@ Status VariableOpPass::DealFusion(const ge::NodePtr &var_node) { } } else { GELOGD( - "The trans node %s type %s connecting with var-ref %s has only" - " one output data nodes, isolate and remove it.", - trans_node->GetName().c_str(), trans_node->GetType().c_str(), ref_node->GetName().c_str()); + "The trans node %s type %s connecting with var-ref %s has only" + " one output data nodes, isolate and remove it.", + trans_node->GetName().c_str(), trans_node->GetType().c_str(), ref_node->GetName().c_str()); if (GraphUtils::IsolateNode(trans_node, {0}) != SUCCESS) { return GE_GRAPH_VARIABLE_OP_PASS_FAILED; } @@ -272,9 +272,9 @@ Status VariableOpPass::CheckSameAndTransOp(const ge::NodePtr &var_node, bool &is } if (data_index != in_anchor->GetIdx()) { GELOGD( - "Variables only can be fusion with trans nodes, the next node %s" - " type %s index %d does not trans anything(correct index %d)", - out_node->GetName().c_str(), out_node->GetType().c_str(), in_anchor->GetIdx(), data_index); + "Variables only can be fusion with trans nodes, the next node %s" + " type %s index %d does not trans anything(correct index %d)", + out_node->GetName().c_str(), out_node->GetType().c_str(), in_anchor->GetIdx(), data_index); return SUCCESS; } @@ -304,9 +304,9 @@ Status VariableOpPass::CheckSameAndTransOp(const ge::NodePtr &var_node, bool &is } GELOGW( - "trans_op type size for var Node(%s) is over 1, Currently not" - " supported, dataTypeAndFormats is %s.", - var_node->GetName().c_str(), type_and_formats_stream.str().c_str()); + "trans_op type size for var Node(%s) is over 1, Currently not" + " supported, dataTypeAndFormats is %s.", + var_node->GetName().c_str(), type_and_formats_stream.str().c_str()); return SUCCESS; } @@ -574,8 +574,8 @@ Status VariableOpPass::RenewVarDesc(ge::ComputeGraphPtr &graph) { // renew var manager desc Status ret = SUCCESS; for (auto &node : graph->GetDirectNode()) { - bool is_var_node = (node->GetType() == VARIABLE) || (node->GetType() == VARIABLEV2) || - (node->GetType() == VARHANDLEOP); + bool is_var_node = + (node->GetType() == VARIABLE) || (node->GetType() == VARIABLEV2) || (node->GetType() == VARHANDLEOP); if (is_var_node) { if (!ge::VarManager::Instance(graph->GetSessionID())->IsVarExist(node->GetName())) { GELOGI("var manager does not exist var node[%s]", node->GetName().c_str()); diff --git a/src/ge/graph/passes/variable_prepare_op_pass.cc b/src/ge/graph/passes/variable_prepare_op_pass.cc index e9a63fbc..c4dca7dc 100644 --- a/src/ge/graph/passes/variable_prepare_op_pass.cc +++ b/src/ge/graph/passes/variable_prepare_op_pass.cc @@ -24,10 +24,11 @@ #include "framework/common/debug/ge_log.h" #include "common/ge/ge_util.h" #include "external/graph/graph.h" +#include "framework/common/debug/ge_log.h" +#include "graph/debug/ge_attr_define.h" #include "graph/node.h" #include "graph/utils/tensor_utils.h" - namespace ge { Status VariablePrepareOpPass::Run(ComputeGraphPtr graph) { GE_CHECK_NOTNULL(graph); @@ -176,7 +177,7 @@ Status VariablePrepareOpPass::AddVariableRef(ge::NodePtr &final_writable_node, g } // add edge final node:index ---> var_ref_node:0 graphStatus ret = - ge::GraphUtils::AddEdge(final_writable_node->GetOutDataAnchor(index), var_ref_node->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(final_writable_node->GetOutDataAnchor(index), var_ref_node->GetInDataAnchor(0)); if (ret != GRAPH_SUCCESS) { GELOGE(FAILED, "add data anchor between var_ref_node and final_writable peer_node failed"); return FAILED; @@ -202,8 +203,7 @@ ge::NodePtr VariablePrepareOpPass::CreatVariableRef(ge::NodePtr &final_writable_ return nullptr; } - OpDescPtr var_ref_op_desc = - MakeShared(var_node->GetName() + var_ref_name.str(), var_op_desc->GetType()); + OpDescPtr var_ref_op_desc = MakeShared(var_node->GetName() + var_ref_name.str(), var_op_desc->GetType()); if (var_ref_op_desc == nullptr) { GELOGE(FAILED, "var_ref opdesc is nullptr"); return nullptr; @@ -211,10 +211,10 @@ ge::NodePtr VariablePrepareOpPass::CreatVariableRef(ge::NodePtr &final_writable_ GE_IF_BOOL_EXEC(var_ref_op_desc->AddOutputDesc(var_op_desc->GetOutputDesc(0)) != SUCCESS, GELOGW("add output desc edge failed"); - return nullptr); + return nullptr); GE_IF_BOOL_EXEC(var_ref_op_desc->AddInputDesc(var_op_desc->GetOutputDesc(0)) != SUCCESS, GELOGW("add input desc edge failed"); - return nullptr); + return nullptr); NodePtr var_ref_node = var_node->GetOwnerComputeGraph()->AddNode(var_ref_op_desc); GE_IF_BOOL_EXEC(var_ref_node == nullptr, GELOGW("var_ref_node is null"); return nullptr); diff --git a/src/ge/graph/passes/variable_ref_delete_op_pass.cc b/src/ge/graph/passes/variable_ref_delete_op_pass.cc index 6a292d41..5ff01a94 100644 --- a/src/ge/graph/passes/variable_ref_delete_op_pass.cc +++ b/src/ge/graph/passes/variable_ref_delete_op_pass.cc @@ -33,7 +33,7 @@ Status VariableRefDeleteOpPass::Run(ge::ComputeGraphPtr graph) { GE_CHECK_NOTNULL(node->GetOpDesc()); std::string ref_var_src_var_name; bool is_variable_ref = (node->GetOpDesc()->GetType() == VARIABLE) && - (ge::AttrUtils::GetStr(node->GetOpDesc(), REF_VAR_SRC_VAR_NAME, ref_var_src_var_name)); + (ge::AttrUtils::GetStr(node->GetOpDesc(), REF_VAR_SRC_VAR_NAME, ref_var_src_var_name)); if (!is_variable_ref) { continue; } @@ -94,8 +94,8 @@ Status VariableRefDeleteOpPass::DealVariableRef(ge::ComputeGraphPtr &graph, ge:: GE_CHECK_NOTNULL(var_ref_src_var->GetOpDesc()); bool is_set_index = ge::AttrUtils::SetInt(var_ref_src_var->GetOpDesc(), REF_VAR_PRE_PEER_OUT_INDEX, index); if (is_set_str && is_set_index) { - GELOGI("[%s]: add attr [REF_VAR_SRC_VAR_NAME: %s ] ", peer_node->GetName().c_str(), ref_var_src_var_name.c_str()); - GELOGI("[%s]: add attr [ REF_VAR_PRE_PEER_OUT_INDEX: %d ]", var_ref_src_var->GetName().c_str(), index); + GELOGD("[%s]: add attr [REF_VAR_SRC_VAR_NAME: %s ] ", peer_node->GetName().c_str(), ref_var_src_var_name.c_str()); + GELOGD("[%s]: add attr [ REF_VAR_PRE_PEER_OUT_INDEX: %d ]", var_ref_src_var->GetName().c_str(), index); } return SUCCESS; diff --git a/src/ge/graph/preprocess/graph_preprocess.cc b/src/ge/graph/preprocess/graph_preprocess.cc index 0a438220..c3066ea7 100644 --- a/src/ge/graph/preprocess/graph_preprocess.cc +++ b/src/ge/graph/preprocess/graph_preprocess.cc @@ -15,18 +15,13 @@ */ #include "graph/preprocess/graph_preprocess.h" - #include #include #include -#include - -#include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h" -#include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h" -#include "common/helper/model_helper.h" #include "common/math/math_util.h" -#include "common/op/ge_op_utils.h" #include "framework/common/debug/ge_log.h" +#include "common/op/ge_op_utils.h" +#include "common/helper/model_helper.h" #include "graph/common/transop_util.h" #include "graph/debug/ge_attr_define.h" #include "graph/ge_context.h" @@ -38,8 +33,6 @@ #include "graph/passes/assert_pass.h" #include "graph/passes/base_pass.h" #include "graph/passes/constant_folding_pass.h" -#include "graph/passes/constant_fuse_same_pass.h" -#include "graph/passes/control_trigger_pass.h" #include "graph/passes/dimension_adjust_pass.h" #include "graph/passes/dimension_compute_pass.h" #include "graph/passes/dropout_pass.h" @@ -55,6 +48,7 @@ #include "graph/passes/merge_pass.h" #include "graph/passes/net_output_pass.h" #include "graph/passes/next_iteration_pass.h" +#include "graph/passes/control_trigger_pass.h" #include "graph/passes/no_use_reshape_remove_pass.h" #include "graph/passes/placeholder_with_default_pass.h" #include "graph/passes/prevent_gradient_pass.h" @@ -74,16 +68,20 @@ #include "graph/passes/update_net_output_pass.h" #include "graph/passes/var_is_initialized_op_pass.h" #include "graph/passes/variable_prepare_op_pass.h" +#include "graph/passes/constant_fuse_same_pass.h" #include "graph/preprocess/insert_op/util_insert_aipp_op.h" #include "graph/types.h" -#include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" #include "inc/pass_manager.h" #include "init/gelib.h" +#include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h" +#include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h" +#include "graph/utils/tensor_utils.h" #include "runtime/dev.h" namespace ge { namespace { + OpDescPtr CreateTensorShape(const GeTensorDesc &data_tensor) { GeTensorPtr tensor = MakeShared(); if (tensor == nullptr) { @@ -112,9 +110,9 @@ OpDescPtr CreateTensorShape(const GeTensorDesc &data_tensor) { dst_shape[i] = dst_ge_shape.GetDim(static_cast(i)); } GE_IF_BOOL_EXEC( - tensor->SetData(reinterpret_cast(dst_shape.get()), dim_cnt * sizeof(int64_t)) != GRAPH_SUCCESS, - GELOGE(INTERNAL_ERROR, "tensor set data failed"); - return nullptr;) + tensor->SetData(reinterpret_cast(dst_shape.get()), dim_cnt * sizeof(int64_t)) != GRAPH_SUCCESS, + GELOGE(INTERNAL_ERROR, "tensor set data failed"); + return nullptr;) } GELOGD("Create shape input dim [%s]", dst_ge_shape.ToString().c_str()); @@ -126,11 +124,11 @@ void AddTransNodeAttr(const std::string &node_type, const GeTensorDesc &input, c // For format transfer node, the IR definition has src/dst format attrs if (node_type == TRANSDATA) { GE_IF_BOOL_EXEC( - !AttrUtils::SetStr(op_desc, FORMAT_TRANSFER_SRC_FORMAT, TypeUtils::FormatToSerialString(input.GetFormat())), - GELOGW("SetStr FORMAT_TRANSFER_SRC_FORMAT failed");) + !AttrUtils::SetStr(op_desc, FORMAT_TRANSFER_SRC_FORMAT, TypeUtils::FormatToSerialString(input.GetFormat())), + GELOGW("SetStr FORMAT_TRANSFER_SRC_FORMAT failed");) GE_IF_BOOL_EXEC( - !AttrUtils::SetStr(op_desc, FORMAT_TRANSFER_DST_FORMAT, TypeUtils::FormatToSerialString(output.GetFormat())), - GELOGW("SetStr FORMAT_TRANSFER_DST_FORMAT failed");) + !AttrUtils::SetStr(op_desc, FORMAT_TRANSFER_DST_FORMAT, TypeUtils::FormatToSerialString(output.GetFormat())), + GELOGW("SetStr FORMAT_TRANSFER_DST_FORMAT failed");) } // For cast node, the IR definition has src/dst attrs if (node_type == CAST) { @@ -170,8 +168,8 @@ NodePtr CreateTransNode(const std::string &name, const std::string &node_type, c // for data dump GE_IF_BOOL_EXEC( - !AttrUtils::SetListStr(op_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, std::move(std::vector())), - GELOGW("CreateTransNode: SetListStr failed");) + !AttrUtils::SetListStr(op_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, std::move(std::vector())), + GELOGW("CreateTransNode: SetListStr failed");) // Default single input and single output auto ret = op_desc->AddInputDesc(input); @@ -442,7 +440,7 @@ NodePtr CreateCastOp(const ge::GeShape &shape, const ge::DataType input_data_typ static uint32_t transop_count = 0; std::string name = std::string("cast_node").append(std::to_string(transop_count++)); - GELOGI("create cast op:%s, input datatype:%s, out datatype:%s", name.c_str(), + GELOGI("Create cast op:%s, input datatype:%s, out datatype:%s", name.c_str(), TypeUtils::DataTypeToSerialString(input_data_type).c_str(), TypeUtils::DataTypeToSerialString(output_data_type).c_str()); @@ -508,7 +506,7 @@ NodePtr CreateTransdataNode(const ge::GeShape &in_shape, const ge::Format input_ // Does not involve multithreading. std::string name = std::string("transdata_node").append(std::to_string(transop_count++)); - GELOGI("create trandata op:%s, input format:%s, out format:%s", name.c_str(), + GELOGI("Create trandata op:%s, input format:%s, out format:%s", name.c_str(), TypeUtils::FormatToSerialString(input_format).c_str(), TypeUtils::FormatToSerialString(output_format).c_str()); GeTensorDesc input(in_shape, input_format, dt); @@ -594,7 +592,7 @@ Status ProcessInputNC1HWC0(NodePtr &node_ptr) { } NodePtr trans_node = - CreateTransdataNode(input->GetShape(), FORMAT_NC1HWC0, old_shape, old_format, input->GetDataType(), node_ptr); + CreateTransdataNode(input->GetShape(), FORMAT_NC1HWC0, old_shape, old_format, input->GetDataType(), node_ptr); GE_CHECK_NOTNULL(trans_node); OutDataAnchorPtr src_out = node_ptr->GetOutDataAnchor(0); InDataAnchorPtr trans_in = trans_node->GetInDataAnchor(0); @@ -611,6 +609,10 @@ GraphPrepare::GraphPrepare() : compute_graph_(nullptr) {} GraphPrepare::~GraphPrepare() {} +/** + * @param graph + * @return + */ Status GraphPrepare::UpdateVariableFormats(ComputeGraphPtr &graph) { GE_CHECK_NOTNULL(graph); auto var_names_to_refs = CollectVarNamesToRefs(graph); @@ -684,7 +686,7 @@ Status GraphPrepare::CheckGraph() { } Status GraphPrepare::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode) { - GELOGI("set rt_context %d, device id:%u.", static_cast(mode), ge::GetContext().DeviceId()); + GELOGI("Set rt_context %d, device id:%u.", static_cast(mode), ge::GetContext().DeviceId()); GE_CHK_RT_RET(rtCtxCreate(&rt_context, mode, ge::GetContext().DeviceId())); GE_CHK_RT_RET(rtCtxSetCurrent(rt_context)); RtContextUtil::GetInstance().AddrtContext(rt_context); @@ -758,6 +760,7 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input) { int64_t shape_size = desc_shape * length; GE_IF_BOOL_EXEC(shape_size == 0, shape_size = static_cast(length)); uint32_t size = 0; + // [No need to check return value] ge::TensorUtils::GetSize(desc, size); if ((size != 0) && (shape_size != static_cast(size))) { GELOGE(PARAM_INVALID, "input data size =%u, shape_size =%ld.", size, shape_size); @@ -1310,7 +1313,9 @@ Status GraphPrepare::OptimizeForPreprocess() { AddNPass addn_pass; names_to_passes.emplace_back("AddNPass", &addn_pass); PrintOpPass print_pass; - names_to_passes.emplace_back("PrintOpPass", &print_pass); + if (options_.enable_print_op_pass) { + names_to_passes.emplace_back("PrintOpPass", &print_pass); + } NoUseReshapeRemovePass no_use_reshape_remove_pass; names_to_passes.emplace_back("NoUseReshapeRemovePass", &no_use_reshape_remove_pass); @@ -1360,7 +1365,7 @@ Status GraphPrepare::OptimizeForPreprocess() { (void)graph_pass.AddPass(new ControlTriggerPass); (void)graph_pass.AddPass(new SwitchOpPass); (void)graph_pass.AddPass(new HcclMemcpyPass); - (void)graph_pass.AddPass(new FlowCtrlPass); + GE_IF_BOOL_EXEC(options_.train_graph_flag, (void)graph_pass.AddPass(new FlowCtrlPass);) (void)graph_pass.AddPass(new EndGraphPass); } catch (std::bad_alloc &e) { GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs."); diff --git a/src/ge/graph/preprocess/graph_preprocess.h b/src/ge/graph/preprocess/graph_preprocess.h index 00a3f22b..ee1eb0bc 100644 --- a/src/ge/graph/preprocess/graph_preprocess.h +++ b/src/ge/graph/preprocess/graph_preprocess.h @@ -16,13 +16,11 @@ #ifndef GE_GRAPH_PREPROCESS_GRAPH_PREPROCESS_H_ #define GE_GRAPH_PREPROCESS_GRAPH_PREPROCESS_H_ - #include #include #include #include #include - #include "common/debug/log.h" #include "common/debug/memory_dumper.h" #include "common/model_parser/base.h" @@ -39,6 +37,7 @@ #include "omg/omg_inner_types.h" #include "runtime/context.h" +/*lint -e148*/ namespace ge { class GraphPrepare { public: @@ -46,8 +45,8 @@ class GraphPrepare { virtual ~GraphPrepare(); GraphPrepare(const GraphPrepare &in) = delete; GraphPrepare &operator=(const GraphPrepare &in) = delete; - Status Prepare(ConstGraphPtr graph, const std::vector &user_input, - ge::ComputeGraphPtr &compute_graph, uint64_t session_id = 0); + Status Prepare(ConstGraphPtr graph, const std::vector &user_input, ge::ComputeGraphPtr &compute_graph, + uint64_t session_id = 0); void SetOptions(const GraphManagerOptions &options); private: @@ -66,7 +65,7 @@ class GraphPrepare { Status OptimizeForDataAfterInfershape(); Status UpdateVariableFormats(ComputeGraphPtr &graph); Status FormatAndShapeProcess(); - Status ResourcePairProcess(const std::string& action); + Status ResourcePairProcess(const std::string &action); void ProcessCCEFormat(); Status OptimizeBeforeInfershape(); void SaveOriginalGraphToOmModel(); @@ -74,4 +73,5 @@ class GraphPrepare { GraphManagerOptions options_; }; } // namespace ge +/*lint +e148*/ #endif // GE_GRAPH_PREPROCESS_GRAPH_PREPROCESS_H_ diff --git a/src/ge/graph/preprocess/insert_op/base_insert_op.cc b/src/ge/graph/preprocess/insert_op/base_insert_op.cc index db8403b2..37bbd48b 100644 --- a/src/ge/graph/preprocess/insert_op/base_insert_op.cc +++ b/src/ge/graph/preprocess/insert_op/base_insert_op.cc @@ -15,25 +15,23 @@ */ #include "graph/preprocess/insert_op/base_insert_op.h" - #include #include - -#include "common/ge/ge_util.h" -#include "common/math/math_util.h" +#include "external/graph/operator_factory.h" +#include "external/graph/operator.h" +#include "framework/common/debug/ge_log.h" #include "common/op/attr_value_util.h" #include "common/op/ge_op_utils.h" #include "common/types.h" #include "common/util.h" -#include "external/graph/operator.h" -#include "external/graph/operator_factory.h" -#include "framework/common/debug/ge_log.h" #include "framework/common/ge_inner_error_codes.h" -#include "graph/debug/ge_attr_define.h" #include "graph/op_desc.h" #include "graph/utils/graph_utils.h" #include "graph/utils/op_desc_utils.h" #include "graph/utils/tensor_utils.h" +#include "common/ge/ge_util.h" +#include "graph/debug/ge_attr_define.h" +#include "common/math/math_util.h" namespace ge { static const char *const kAippConfigPath = "aipp_config_route"; @@ -42,7 +40,7 @@ static const uint32_t kImageRatioYuv420SpU8Div = 2; static const uint32_t kImageRatioXrgb8888U8 = 4; static const uint32_t kImageRatioRgb888U8 = 3; -Status InsertOpBase::InsertAippToGraph(ComputeGraphPtr &graph, std::string &aipp_config_path, +Status InsertOpBase::InsertAippToGraph(ComputeGraphPtr &graph, std::string &aippConfigPath, ge::NodePtr &inserted_aipp_node) { GE_CHECK_NOTNULL(graph); NodePtr target_input = nullptr; @@ -58,7 +56,7 @@ Status InsertOpBase::InsertAippToGraph(ComputeGraphPtr &graph, std::string &aipp GELOGW("InsertAippToGraph: GetNamedAttrs failed"); return FAILED) - auto opdesc_src_data = target_input->GetOpDesc()->GetOutputDesc(0); + auto opdesc_src_data = target_input->GetOpDesc()->GetOutputDesc(0); // [Cascade pointer] if (opdesc_src_data.GetDataType() != DT_FLOAT) { GELOGW("The datatype of data node %s is not FP32", target_input->GetName().c_str()); opdesc_src_data.SetDataType(DT_FLOAT); @@ -86,14 +84,14 @@ Status InsertOpBase::InsertAippToGraph(ComputeGraphPtr &graph, std::string &aipp return FAILED; } } - GE_IF_BOOL_EXEC(!AttrUtils::SetStr(aipp_opdesc_ptr, kAippConfigPath, aipp_config_path), + GE_IF_BOOL_EXEC(!AttrUtils::SetStr(aipp_opdesc_ptr, kAippConfigPath, aippConfigPath), GELOGW("SetStr kAippConfigPath failed");) - GELOGI("Aipp config path is %s", aipp_config_path.c_str()); + GELOGI("Aipp config path is %s", aippConfigPath.c_str()); // for data dump - GE_IF_BOOL_EXEC(!AttrUtils::SetListStr(aipp_opdesc_ptr, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, - std::move(std::vector())), - GELOGW("InsertAippToGraph: SetListStr failed");) + GE_IF_BOOL_EXEC( + !AttrUtils::SetListStr(aipp_opdesc_ptr, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, std::move(std::vector())), + GELOGW("InsertAippToGraph: SetListStr failed");) NodePtr insert_op = graph->AddNode(aipp_opdesc_ptr); GE_CHECK_NOTNULL(insert_op); @@ -129,11 +127,11 @@ uint32_t InsertOpBase::AdjustDataSize(const GeTensorDesc &input_desc, unique_ptr const uint32_t h = (input_desc.GetFormat() == ge::FORMAT_NHWC) ? NHWC_DIM_H : NCHW_DIM_H; const uint32_t w = (input_desc.GetFormat() == ge::FORMAT_NHWC) ? NHWC_DIM_W : NCHW_DIM_W; const uint32_t shape_h = - aipp_params->src_image_size_h() ? aipp_params->src_image_size_h() : input_desc.GetShape().GetDim(h); + aipp_params->src_image_size_h() ? aipp_params->src_image_size_h() : input_desc.GetShape().GetDim(h); FMK_UINT32_MULCHECK(size, shape_h); size *= shape_h; const uint32_t shape_w = - aipp_params->src_image_size_w() ? aipp_params->src_image_size_w() : input_desc.GetShape().GetDim(w); + aipp_params->src_image_size_w() ? aipp_params->src_image_size_w() : input_desc.GetShape().GetDim(w); FMK_UINT32_MULCHECK(size, shape_w); size *= shape_w; if (aipp_params->input_format() == domi::AippOpParams::YUV420SP_U8) { diff --git a/src/ge/graph/preprocess/insert_op/base_insert_op.h b/src/ge/graph/preprocess/insert_op/base_insert_op.h index db826ece..57a39867 100644 --- a/src/ge/graph/preprocess/insert_op/base_insert_op.h +++ b/src/ge/graph/preprocess/insert_op/base_insert_op.h @@ -21,7 +21,6 @@ #include #include #include - #include "common/fmk_error_codes.h" #include "common/types.h" #include "framework/common/ge_inner_error_codes.h" @@ -101,4 +100,3 @@ class InsertOpBase { } // namespace ge #endif // GE_GRAPH_PREPROCESS_INSERT_OP_BASE_INSERT_OP_H_ - diff --git a/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc index 129a27f2..026aff40 100644 --- a/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -15,28 +15,25 @@ */ #include "graph/preprocess/insert_op/ge_aipp_op.h" - #include #include #include #include - #include "framework/common/debug/ge_log.h" #include "framework/common/ge_inner_error_codes.h" #include "common/util.h" #include "graph/optimize/common/params.h" - -#define SAVE_AIPP_ATTR(KEY, SAVE_TYPE) \ - do { \ +#define SAVE_AIPP_ATTR(KEY, SAVE_TYPE) \ + do { \ (void)aipp_attrs.SetAttr(#KEY, GeAttrValue::CreateFrom(aipp_params_->KEY())); \ } while (0) -#define SAVE_AIPP_ATTR_LIST(KEY, SAVE_TYPE) \ - do { \ - if (aipp_params_->KEY##_size() > 0) { \ +#define SAVE_AIPP_ATTR_LIST(KEY, SAVE_TYPE) \ + do { \ + if (aipp_params_->KEY##_size() > 0) { \ (void)aipp_attrs.SetAttr(#KEY, GeAttrValue::CreateFrom(aipp_params_->KEY(0))); \ - } \ + } \ } while (0) namespace { @@ -107,10 +104,9 @@ Status AippOp::GetTargetPosition(ComputeGraphPtr graph, NodePtr &target_input, const bool is_edge_configed = aipp_params_->input_edge_idx_size() > 0; - GE_CHK_BOOL_RET_STATUS( - !is_edge_configed || aipp_params_->input_edge_idx(0) < target_input->GetOutDataNodes().size(), PARAM_INVALID, - "input_edge_idx %u should smaller than out edge size of target input %zu ", aipp_params_->input_edge_idx(0), - target_input->GetOutDataNodes().size()); + GE_CHK_BOOL_RET_STATUS(!is_edge_configed || aipp_params_->input_edge_idx(0) < target_input->GetOutDataNodes().size(), + PARAM_INVALID, "input_edge_idx %u should smaller than out edge size of target input %zu ", + aipp_params_->input_edge_idx(0), target_input->GetOutDataNodes().size()); uint32_t i = 0; for (OutDataAnchorPtr &src_out : target_input->GetAllOutDataAnchors()) { @@ -158,16 +154,15 @@ Status AippOp::SetDefaultParams() { GELOGI("parse aipp params:min_chn_0:%f, min_chn_1:%f, min_chn_2:%f.", aipp_params_->min_chn_0(), aipp_params_->min_chn_1(), aipp_params_->min_chn_2()); - GE_IF_BOOL_EXEC(!aipp_params_->crop(), aipp_params_->set_load_start_pos_h(0); - aipp_params_->set_load_start_pos_w(0); aipp_params_->set_crop_size_h(0); - aipp_params_->set_crop_size_w(0);); + GE_IF_BOOL_EXEC(!aipp_params_->crop(), aipp_params_->set_load_start_pos_h(0); aipp_params_->set_load_start_pos_w(0); + aipp_params_->set_crop_size_h(0); aipp_params_->set_crop_size_w(0);); GE_IF_BOOL_EXEC(!aipp_params_->resize(), aipp_params_->set_resize_output_h(0); - aipp_params_->set_resize_output_w(0);); + aipp_params_->set_resize_output_w(0);); GE_IF_BOOL_EXEC(!aipp_params_->padding(), aipp_params_->set_left_padding_size(0); - aipp_params_->set_right_padding_size(0); aipp_params_->set_top_padding_size(0); - aipp_params_->set_bottom_padding_size(0);); + aipp_params_->set_right_padding_size(0); aipp_params_->set_top_padding_size(0); + aipp_params_->set_bottom_padding_size(0);); } return SUCCESS; @@ -318,7 +313,8 @@ Status AippOp::GenerateOpDesc(OpDescPtr op_desc) { ConvertParamToAttr(aipp_attrs); GE_IF_BOOL_EXEC(!AttrUtils::SetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attrs), - GELOGE(FAILED, "failed to set ATTR_NAME_AIPP"); return FAILED); + GELOGE(FAILED, "failed to set ATTR_NAME_AIPP"); + return FAILED); return SUCCESS; } diff --git a/src/ge/graph/preprocess/insert_op/ge_aipp_op.h b/src/ge/graph/preprocess/insert_op/ge_aipp_op.h index 0e813d95..0b288971 100644 --- a/src/ge/graph/preprocess/insert_op/ge_aipp_op.h +++ b/src/ge/graph/preprocess/insert_op/ge_aipp_op.h @@ -19,7 +19,6 @@ #include #include - #include "common/op/attr_value_util.h" #include "graph/preprocess/insert_op/base_insert_op.h" #include "proto/insert_op.pb.h" @@ -49,7 +48,6 @@ class AippOp : public InsertOpBase { /// @ingroup domi_omg /// @brief Generate Op Desc /// - Status GenerateOpDesc(ge::OpDescPtr op_desc) override; /// @@ -65,8 +63,8 @@ class AippOp : public InsertOpBase { domi::AippOpParams::AippMode GetAippMode() override; private: - AippOp& operator=(const AippOp& aipp_op); - AippOp(const AippOp& aipp_op); + AippOp &operator=(const AippOp &aipp_op); + AippOp(const AippOp &aipp_op); /// /// @ingroup domi_omg @@ -77,10 +75,9 @@ class AippOp : public InsertOpBase { void SetDtcDefaultValue(); - domi::AippOpParams *aipp_params_; + domi::AippOpParams *aipp_params_ = nullptr; ge::NodePtr aipp_node_ = nullptr; }; } // namespace ge #endif // GE_GRAPH_PREPROCESS_INSERT_OP_GE_AIPP_OP_H_ - diff --git a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index c1f1f344..668ee1e4 100644 --- a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -15,24 +15,22 @@ */ #include "graph/preprocess/insert_op/util_insert_aipp_op.h" - #include #include - -#include "common/dynamic_aipp.h" #include "common/ge/ge_util.h" #include "common/op/ge_op_utils.h" #include "common/util.h" #include "framework/common/debug/ge_log.h" #include "framework/common/ge_inner_error_codes.h" #include "framework/omg/omg_inner_types.h" -#include "graph/debug/ge_attr_define.h" #include "graph/preprocess/insert_op/ge_aipp_op.h" +#include "graph/debug/ge_attr_define.h" #include "graph/utils/attr_utils.h" #include "graph/utils/graph_utils.h" #include "graph/utils/op_desc_utils.h" #include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" +#include "inc/common/dynamic_aipp.h" using domi::AippOpParams; @@ -132,7 +130,7 @@ Status InsertNewOpUtil::Parse(const char *conf_path) { return SUCCESS; } -Status InsertNewOpUtil::AddAippInputData(const ge::NodePtr &aipp_node, const ge::ComputeGraphPtr &graph) { +Status InsertNewOpUtil::AddAippInputData(ge::NodePtr aipp_node, ge::ComputeGraphPtr graph) { GELOGD("Enter add aipp data node process!"); static int index = 0; @@ -164,7 +162,7 @@ Status InsertNewOpUtil::AddAippInputData(const ge::NodePtr &aipp_node, const ge: auto batch_count = nchw_dims[NCHW_DIM_N]; // new add aipp_data ops for dynamic aipp param input OpDescPtr op_desc_ptr_data = - ge::MakeShared(std::string("aipp_data_").append(std::to_string(index++)), AIPPDATA); + ge::MakeShared(std::string("aipp_data_").append(std::to_string(index++)), AIPPDATA); // calc max size if (batch_count <= 0 || batch_count > kMaxBatchCountNum) { @@ -197,7 +195,6 @@ Status InsertNewOpUtil::AddAippInputData(const ge::NodePtr &aipp_node, const ge: return INTERNAL_ERROR; } // add node desc for aipp node - GE_CHECK_NOTNULL(aipp_node->GetOpDesc()); auto stat3 = aipp_node->GetOpDesc()->UpdateInputDesc(1, output_tensor); if (stat1 != SUCCESS || stat2 != SUCCESS || stat3 != SUCCESS) { GELOGE(INTERNAL_ERROR, "node process desc failed!"); @@ -272,11 +269,11 @@ Status InsertNewOpUtil::CheckPositionNotRepeat() { GE_IF_BOOL_EXEC(item->related_input_rank() != another_item->related_input_rank(), continue;); GE_IF_BOOL_EXEC( - item->input_edge_idx_size() == 0 || another_item->input_edge_idx_size() == 0 || - item->input_edge_idx(0) == another_item->input_edge_idx(0), - GELOGE(PARAM_INVALID, - "Can not insert aipp op to the same position! please check related_input_rank and input_edge_idx."); - return PARAM_INVALID;); + item->input_edge_idx_size() == 0 || another_item->input_edge_idx_size() == 0 || + item->input_edge_idx(0) == another_item->input_edge_idx(0), + GELOGE(PARAM_INVALID, + "Can not insert aipp op to the same postion! please check related_input_rank and input_edge_idx."); + return PARAM_INVALID;); } } @@ -318,39 +315,39 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { GE_CHK_BOOL_RET_STATUS(aippMode == aippParams->aipp_mode(), PARAM_INVALID, "The aipp_mode of all aipp_op must be the same");); - GE_IF_BOOL_EXEC(aippNodes.size() > 1, for (decltype(aippNodes)::size_type i = 1; i < aippNodes.size(); i++) { - std::unique_ptr currAippParam(new (std::nothrow) domi::AippOpParams()); - GE_CHECK_NOTNULL(currAippParam); - GE_CHK_STATUS(GetAippParams(currAippParam, aippNodes[i])); - - GE_CHK_BOOL_RET_STATUS(aippMode == currAippParam->aipp_mode(), PARAM_INVALID, - "The aipp_mode of all aipp_op must be the same"); - if (aippMode == domi::AippOpParams::static_) { - GE_CHK_BOOL_RET_STATUS(aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID, - "The input_format of all aipp_ops after one Data should be the same"); - GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_w() == currAippParam->src_image_size_w(), PARAM_INVALID, - "The src_image_size_w of all aipp_ops after one Data should be the same"); - GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_h() == currAippParam->src_image_size_h(), PARAM_INVALID, - "The src_image_size_h of all aipp_ops after one Data should be the same"); - } else { - GE_CHK_BOOL_RET_STATUS(aippParams->max_src_image_size() == currAippParam->max_src_image_size(), PARAM_INVALID, - "The max_src_image_size of all aipp_ops after one Data should be the same"); - } - }); + GE_IF_BOOL_EXEC( + aippNodes.size() > 1, for (decltype(aippNodes)::size_type i = 1; i < aippNodes.size(); i++) { + std::unique_ptr currAippParam(new (std::nothrow) domi::AippOpParams()); + GE_CHECK_NOTNULL(currAippParam); + GE_CHK_STATUS(GetAippParams(currAippParam, aippNodes[i])); + + GE_CHK_BOOL_RET_STATUS(aippMode == currAippParam->aipp_mode(), PARAM_INVALID, + "The aipp_mode of all aipp_op must be the same"); + if (aippMode == domi::AippOpParams::static_) { + GE_CHK_BOOL_RET_STATUS(aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID, + "The input_format of all aipp_ops after one Data should be the same"); + GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_w() == currAippParam->src_image_size_w(), PARAM_INVALID, + "The src_image_size_w of all aipp_ops after one Data should be the same"); + GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_h() == currAippParam->src_image_size_h(), PARAM_INVALID, + "The src_image_size_h of all aipp_ops after one Data should be the same"); + } else { + GE_CHK_BOOL_RET_STATUS(aippParams->max_src_image_size() == currAippParam->max_src_image_size(), PARAM_INVALID, + "The max_src_image_size of all aipp_ops after one Data should be the same"); + } + }); } return SUCCESS; } -Status InsertNewOpUtil::GetAippParams(const std::unique_ptr &aipp_params, - const NodePtr &aipp_node) { +Status InsertNewOpUtil::GetAippParams(const std::unique_ptr &aippParams, const NodePtr &aipp_node) { GE_CHECK_NOTNULL(aipp_node); ge::GeAttrValue::NamedAttrs aipp_attr; const OpDescPtr tmpOpPtr = aipp_node->GetOpDesc(); GE_CHECK_NOTNULL(tmpOpPtr); GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(tmpOpPtr, ATTR_NAME_AIPP, aipp_attr), FAILED, "Aipp node should contain param aipp!"); - GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); + GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aippParams.get()), "get aipp params failed"); return SUCCESS; } @@ -358,13 +355,13 @@ Status InsertNewOpUtil::GetAippParams(const std::unique_ptr Status InsertNewOpUtil::AddMultiShapeInputData(const ge::ComputeGraphPtr &graph) { GE_CHECK_NOTNULL(graph); for (auto &node : graph->GetDirectNode()) { - GE_CHECK_NOTNULL(node->GetOpDesc()); + GE_CHECK_NOTNULL(node); if (node->GetOpDesc()->GetType() != MULTISHAPE) { continue; } GE_CHK_BOOL_RET_STATUS(node->GetInDataNodes().size() == 1, FAILED, - "multi_shape node should follow one data node, but size of input edges is %zu", + "multi_shape node should follow one data node, but size of input edges is %d", node->GetInDataNodes().size()); NodePtr dataNode = node->GetInDataNodes().at(0); diff --git a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h index e6dc28ce..689e66e4 100644 --- a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h +++ b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h @@ -20,13 +20,12 @@ #include #include #include - #include "graph/compute_graph.h" #include "graph/preprocess/insert_op/base_insert_op.h" #include "proto/insert_op.pb.h" namespace ge { -enum AippType { kOldType, kNewType }; +enum AippType { OLD_TYPE, NEW_TYPE }; class InsertNewOpUtil { public: @@ -41,7 +40,7 @@ class InsertNewOpUtil { Status InsertNewOps(const ge::ComputeGraphPtr &graph); - Status InsertAippOps(ge::ComputeGraphPtr &graph, std::string &aipp_config_path); + Status InsertAippOps(ge::ComputeGraphPtr &graph, std::string &aippConfigPath); void ClearNewOps(); @@ -50,13 +49,13 @@ class InsertNewOpUtil { Status AddMultiShapeInputData(const ge::ComputeGraphPtr &graph); - Status GetAippParams(const std::unique_ptr &aipp_params, const ge::NodePtr &aipp_node); + Status GetAippParams(const std::unique_ptr &aippParams, const ge::NodePtr &aipp_node); Status CheckGraph(const ge::ComputeGraphPtr &graph); InsertNewOpUtil() {} - Status AddAippInputData(const ge::NodePtr &aipp_node, const ge::ComputeGraphPtr &graph); + Status AddAippInputData(ge::NodePtr aipp_node, ge::ComputeGraphPtr graph); ~InsertNewOpUtil() = default; diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.cc b/src/ge/graph/preprocess/multi_batch_copy_graph.cc index c7ab6fef..3a4137ea 100644 --- a/src/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/src/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -16,22 +16,22 @@ #include "graph/preprocess/multi_batch_copy_graph.h" -#include -#include #include +#include +#include -#include "common/formats/utils/formats_trans_utils.h" -#include "common/ge/ge_util.h" -#include "framework/common/debug/ge_log.h" -#include "framework/common/ge_inner_error_codes.h" -#include "framework/common/string_util.h" -#include "framework/common/types.h" -#include "graph/debug/ge_attr_define.h" #include "graph/ge_context.h" -#include "graph/passes/prune_pass.h" -#include "graph/utils/attr_utils.h" #include "graph/utils/graph_utils.h" #include "graph/utils/node_utils.h" +#include "graph/utils/attr_utils.h" +#include "graph/debug/ge_attr_define.h" +#include "framework/common/ge_inner_error_codes.h" +#include "framework/common/types.h" +#include "framework/common/string_util.h" +#include "framework/common/debug/ge_log.h" +#include "common/ge/ge_util.h" +#include "common/formats/utils/formats_trans_utils.h" +#include "graph/passes/prune_pass.h" namespace ge { namespace multibatch { @@ -44,9 +44,7 @@ const int kMergeDataOutIndex = 0; const size_t kMaxShapesCount = 16; const size_t kMinShapesCount = 2; -inline bool IsDataLikeType(const std::string &node_type) { - return (node_type == DATA) || (node_type == AIPP); -} +inline bool IsDataLikeType(const std::string &node_type) { return (node_type == DATA) || (node_type == AIPP); } NodePtr InsertMergeNodeToGraph(const std::string &name, size_t input_num, const ComputeGraphPtr &graph) { OpDescPtr desc = MakeShared(); @@ -630,16 +628,9 @@ Status MultiBatchGraphCopyer::InsertSwitchNForData(const NodePtr &data) { switchn_desc->SetType(SWITCHN); GeTensorDesc tensor(NodeUtils::GetOutputDesc(*data, kDataOutIndex)); - if (switchn_desc->AddInputDesc(tensor) != GRAPH_SUCCESS) { // data - GELOGE(FAILED, "Failed to add inpit desc."); - return FAILED; - } - + switchn_desc->AddInputDesc(tensor); // data GeTensorDesc pred_tensor; - if (switchn_desc->AddInputDesc(pred_tensor) != GRAPH_SUCCESS) { // pred - GELOGE(FAILED, "Failed to add inpit desc."); - return FAILED; - } + switchn_desc->AddInputDesc(pred_tensor); // pred for (size_t i = 0; i < shapes_.size(); ++i) { auto shape = data_shape; auto ret = CalcShape(shapes_.at(i), shape); @@ -653,10 +644,7 @@ Status MultiBatchGraphCopyer::InsertSwitchNForData(const NodePtr &data) { GELOGE(INTERNAL_ERROR, "Failed to add attr value on output %zu tensor", i); return INTERNAL_ERROR; } - if (switchn_desc->AddOutputDesc(tensor) != GRAPH_SUCCESS) { // pred - GELOGE(FAILED, "Failed to add inpit desc."); - return FAILED; - } + switchn_desc->AddOutputDesc(tensor); GELOGD("The SwitchN %s output index %zu, shape %s", switchn_desc->GetName().c_str(), i, shape.ToString().c_str()); } @@ -745,7 +733,7 @@ Status MultiBatchGraphCopyer::LinkEdges() { Status MultiBatchGraphCopyer::LinkDataToSwitchN(const NodePtr &data) { auto switchn = data_nodes_to_switchn_[data.get()]; auto ret = - GraphUtils::AddEdge(shape_data_->GetOutDataAnchor(kDataOutIndex), switchn->GetInDataAnchor(kSwitchNPredIndex)); + GraphUtils::AddEdge(shape_data_->GetOutDataAnchor(kDataOutIndex), switchn->GetInDataAnchor(kSwitchNPredIndex)); if (ret != GRAPH_SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to link shape data %s to switchn %s", shape_data_->GetName().c_str(), switchn->GetName().c_str()); diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.h b/src/ge/graph/preprocess/multi_batch_copy_graph.h index ca0fe828..7421469e 100644 --- a/src/ge/graph/preprocess/multi_batch_copy_graph.h +++ b/src/ge/graph/preprocess/multi_batch_copy_graph.h @@ -16,12 +16,12 @@ #ifndef GE_GRAPH_PREPROCESS_MULTI_BATCH_COPY_GRAPH_H_ #define GE_GRAPH_PREPROCESS_MULTI_BATCH_COPY_GRAPH_H_ - #include #include #include #include "external/ge/ge_api_error_codes.h" + #include "graph/compute_graph.h" namespace ge { @@ -39,9 +39,7 @@ class MultiBatchGraphCopyer { explicit MultiBatchGraphCopyer(ComputeGraphPtr &graph) : graph_(graph) {} ~MultiBatchGraphCopyer() = default; - void AddShape(const std::vector &shape) { - shapes_.emplace_back(shape); - } + void AddShape(const std::vector &shape) { shapes_.emplace_back(shape); } Status CopyGraph(); @@ -57,20 +55,19 @@ class MultiBatchGraphCopyer { Status UpdateMaxShapeToData(const NodePtr &data); Status InsertMergeForEdgeNode(const NodePtr &node); - - /// - /// Insert a merge node for src node `node` on output index `index`. The merge node will be used to merge all nodes - /// in batch-branch to one output to the node out of the batch-branch. - /// Cond 1: If the `index` is -1, then the src node link a data edge(at output 0) to the merge node, - /// Cond 2: In condition 1, if the src node does not have any data output, we create a const node after it, - /// the result like this: - /// src_node ---------> const_for_src_node --------> merge - /// control data - /// Cond 3: If the src node is a data-like node, the SwitchN after it will be link to the merge node. - /// @param node - /// @param index - /// @return - /// + /** + * Insert a merge node for src node `node` on output index `index`. The merge node will be used to merge all nodes + * in batch-branch to one output to the node out of the batch-branch. + * Cond 1: If the `index` is -1, then the src node link a data edge(at output 0) to the merge node, + * Cond 2: In condition 1, if the src node does not have any data output, we create a const node after it, + * the result like this: + * src_node ---------> const_for_src_node --------> merge + * control data + * Cond 3: If the src node is a data-like node, the SwitchN after it will be link to the merge node. + * @param node + * @param index + * @return + */ NodePtr InsertMergeNode(const NodePtr &node, int index); Status CopyNodeInBatchBranch(const NodePtr &node); diff --git a/src/ge/init/gelib.cc b/src/ge/init/gelib.cc index c8852346..4fa2664f 100644 --- a/src/ge/init/gelib.cc +++ b/src/ge/init/gelib.cc @@ -15,16 +15,13 @@ */ #include "init/gelib.h" - #include #include - #include #include #include #include #include - #include "framework/common/debug/ge_log.h" #include "common/ge/plugin_manager.h" #include "common/ge/ge_util.h" @@ -35,8 +32,7 @@ #include "graph/ge_context.h" #include "graph/ge_global_options.h" #include "ge/ge_api_types.h" -#include "cce/aicpu_engine.h" -#include "cce/fwk_adpt_struct.h" +#include #include "graph/load/new_model_manager/model_manager.h" #include "omm/csa_interact.h" #include "common/properties_manager.h" @@ -171,10 +167,9 @@ void GELib::InitOptions(const map &options) { if (iter != options.end()) { this->options_.device_id = static_cast(std::strtol(iter->second.c_str(), nullptr, kDecimal)); } - this->options_.job_id = 0; iter = options.find(OPTION_EXEC_JOB_ID); if (iter != options.end()) { - this->options_.job_id = std::strtoll(iter->second.c_str(), nullptr, kDecimal); + this->options_.job_id = iter->second.c_str(); } this->options_.isUseHcom = false; iter = options.find(OPTION_EXEC_IS_USEHCOM); @@ -209,7 +204,7 @@ void GELib::InitOptions(const map &options) { FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOptions(Options &options) { GELOGI("Training init GELib. session Id:%ld, device id :%d ", options.session_id, options.device_id); - GEEVENT("System init with options begin, job id %ld", options.job_id); + GEEVENT("System init with options begin, job id %s", options.job_id.c_str()); std::lock_guard lock(status_mutex_); GE_IF_BOOL_EXEC(is_system_inited && !is_shutdown, GELOGW("System init with options is already inited and not shutdown."); @@ -218,7 +213,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOpt // profiling init if (ProfilingManager::Instance().Init(options) != SUCCESS) { - GELOGW("Profiling init failed."); + GELOGW("Profiling init failed."); } std::vector mem_type; @@ -230,7 +225,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOpt } // Update CSA file - CsaInteract::GetInstance().Init(options.device_id, options.job_id); + CsaInteract::GetInstance().Init(options.device_id, GetContext().TraceId()); Status ret = CsaInteract::GetInstance().WriteJobState(JOBSTATE_RUNNING, JOBSUBSTATE_ENV_INIT); GE_LOGE_IF(ret != SUCCESS, "write job state failed, ret:%u", ret); options.physical_device_id = options.device_id; @@ -248,7 +243,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOpt GetContext().SetCtxDeviceId(dev_logic_index); GE_CHK_RT_RET(rtSetDevice(options.device_id)); - cce::cceSysInit(); // In the scenario that the automatic add fusion is set, but there is no cleanaddr operator, // maybe need to check it is_system_inited = true; @@ -303,7 +297,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithout GELOGW("System init without options is already inited, don't need to init again."); return SUCCESS; } - cce::cceSysInit(); is_inited = true; GELOGI("Inference init GELib success."); diff --git a/src/ge/init/gelib.h b/src/ge/init/gelib.h index b0198c1d..06cb07ca 100644 --- a/src/ge/init/gelib.h +++ b/src/ge/init/gelib.h @@ -16,20 +16,18 @@ #ifndef GE_INIT_GELIB_H_ #define GE_INIT_GELIB_H_ - #include #include #include #include - #include "engine_manager/dnnengine_manager.h" #include "opskernel_manager/ops_kernel_manager.h" #include "session/session_manager.h" #include "common/ge_inner_error_codes.h" #include "common/ge_types.h" -using std::string; using std::map; +using std::string; using std::vector; namespace ge { diff --git a/src/ge/model/ge_model.cc b/src/ge/model/ge_model.cc index f9b80eee..36b564b8 100644 --- a/src/ge/model/ge_model.cc +++ b/src/ge/model/ge_model.cc @@ -15,9 +15,7 @@ */ #include "model/ge_model.h" - #include - #include "common/debug/log.h" #include "graph/debug/ge_attr_define.h" #include "graph/utils/attr_utils.h" @@ -58,9 +56,7 @@ void GeModel::SetGraph(const Graph &graph) { this->graph_ = graph; } void GeModel::SetModelTaskDef(const std::shared_ptr &task) { this->task_ = task; } -void GeModel::SetTBEKernelStore(const TBEKernelStore &tbe_kernal_store) { - this->tbe_kernal_store_ = tbe_kernal_store; -} +void GeModel::SetTBEKernelStore(const TBEKernelStore &tbe_kernal_store) { this->tbe_kernal_store_ = tbe_kernal_store; } void GeModel::SetWeight(const Buffer &weights_buffer) { this->weights_buffer_ = weights_buffer; } diff --git a/src/ge/model/ge_model.h b/src/ge/model/ge_model.h index 7c8581fb..ab779d03 100644 --- a/src/ge/model/ge_model.h +++ b/src/ge/model/ge_model.h @@ -17,10 +17,10 @@ #ifndef GE_MODEL_GE_MODEL_H_ #define GE_MODEL_GE_MODEL_H_ +#include #include #include #include - #include "common/tbe_kernel_store.h" #include "framework/common/debug/log.h" #include "framework/common/fmk_error_codes.h" @@ -61,9 +61,9 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeModel : public AttrHolder ProtoAttrMapHelper MutableAttrMap() override; - using AttrHolder::SetAttr; - using AttrHolder::GetAllAttrs; using AttrHolder::GetAllAttrNames; + using AttrHolder::GetAllAttrs; + using AttrHolder::SetAttr; void SetModelId(uint32_t model_id) { model_id_ = model_id; } uint32_t GetModelId() const { return model_id_; } @@ -74,12 +74,12 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeModel : public AttrHolder private: void Init(); - ProtoAttrMapHelper attrs_; + ProtoAttrMapHelper attrs_; /*lint !e148*/ - Graph graph_; - std::shared_ptr task_; + Graph graph_; /*lint !e148*/ + std::shared_ptr task_; /*lint !e148*/ TBEKernelStore tbe_kernal_store_; - Buffer weights_buffer_; + Buffer weights_buffer_; /*lint !e148*/ std::string name_; uint32_t version_ = {0}; diff --git a/src/ge/opskernel_manager/ops_kernel_manager.cc b/src/ge/opskernel_manager/ops_kernel_manager.cc old mode 100755 new mode 100644 index c07fd51b..5c18b213 --- a/src/ge/opskernel_manager/ops_kernel_manager.cc +++ b/src/ge/opskernel_manager/ops_kernel_manager.cc @@ -30,7 +30,7 @@ const char *const kInitialize = "Initialize"; const char *const kGetOpsKernelInfoStores = "GetOpsKernelInfoStores"; const char *const kGetGraphOptimizerObjs = "GetGraphOptimizerObjs"; const char *const kFinalize = "Finalize"; -} +} // namespace namespace ge { OpsKernelManager::OpsKernelManager() @@ -69,10 +69,11 @@ Status OpsKernelManager::Initialize(const map &options_const) { ret = plugin_manager_.LoadSo(extern_engine_path, func_check_list); if (ret == SUCCESS) { initialize_ = options; - Status rst0 = plugin_manager_.InvokeAll&, Status>(kInitialize, initialize_); + Status rst0 = plugin_manager_.InvokeAll &, Status>(kInitialize, initialize_); Status rst1 = - plugin_manager_.InvokeAll&>(kGetOpsKernelInfoStores, ops_kernel_store_); - Status rst2 = plugin_manager_.InvokeAll&>(kGetGraphOptimizerObjs, graph_optimizers_); + plugin_manager_.InvokeAll &>(kGetOpsKernelInfoStores, ops_kernel_store_); + Status rst2 = + plugin_manager_.InvokeAll &>(kGetGraphOptimizerObjs, graph_optimizers_); if ((rst0 != SUCCESS) || (rst1 != SUCCESS) || (rst2 != SUCCESS)) { GELOGE(GE_OPS_GET_NO_VALID_SO); return GE_OPS_GET_NO_VALID_SO; @@ -109,10 +110,8 @@ void OpsKernelManager::GetExternalEnginePath(std::string &extern_engine_path) { std::string path_base = PluginManager::GetPath(); std::string so_path = "plugin/opskernel/"; std::string path = path_base + so_path; - extern_engine_path = (path + "libfe.so" + ":") + - (path + "libge_local_engine.so" + ":") + - (path + "librts_engine.so" + ":") + - (path + "libaicpu_engine.so" + ":") + + extern_engine_path = (path + "libfe.so" + ":") + (path + "libge_local_engine.so" + ":") + + (path + "librts_engine.so" + ":") + (path + "libaicpu_engine.so" + ":") + (path_base + "libhccl.so"); } @@ -363,10 +362,8 @@ void OpsKernelManager::GetGraphOptimizerByEngine(const std::string &engine_name, bool OpsKernelManager::GetEnableFeFlag() const { return enable_fe_flag_; } - bool OpsKernelManager::GetEnableAICPUFlag() const { return enable_aicpu_flag_; } - bool OpsKernelManager::GetEnablePluginFlag() const { return (enable_fe_flag_ || enable_aicpu_flag_); } Status OpsKernelManager::FinalizeOpsKernel() { diff --git a/src/ge/session/inner_session.cc b/src/ge/session/inner_session.cc index 9f90ffd8..4798de90 100644 --- a/src/ge/session/inner_session.cc +++ b/src/ge/session/inner_session.cc @@ -15,11 +15,9 @@ */ #include "session/inner_session.h" - #include #include #include - #include "graph/load/new_model_manager/model_manager.h" #include "graph/ge_global_options.h" #include "graph/ge_local_context.h" @@ -41,7 +39,7 @@ Status InnerSession::Initialize() { GELOGW("[InnerSession:%lu] session already initialize.", session_id_); return SUCCESS; } - UpdateThreadContext(); + UpdateThreadContext(std::map{}); GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId())); @@ -51,10 +49,10 @@ Status InnerSession::Initialize() { return ret; } - int32_t version = static_cast(SessionVersion::ClOUD_VERSION); - const int kDefaultDeviceId = 0; - const int kDefaultJobId = 0; - ret = VarManager::Instance(session_id_)->Init(version, session_id_, kDefaultDeviceId, kDefaultJobId); + int32_t version = static_cast(SessionVersion::ClOUD_VERSION); + const int DEFAULT_DEVICE_ID = 0; + const int DEFAULT_JOB_ID = 0; + ret = VarManager::Instance(session_id_)->Init(version, session_id_, DEFAULT_DEVICE_ID, DEFAULT_JOB_ID); if (ret != SUCCESS) { GELOGE(ret, "failed to init session instance"); } @@ -68,7 +66,7 @@ Status InnerSession::Finalize() { GELOGW("[InnerSession:%lu] session does not initialize.", session_id_); return SUCCESS; } - UpdateThreadContext(); + UpdateThreadContext(std::map{}); Status ret = graph_manager_.Finalize(); if (ret != SUCCESS) { // Subsequent code execution is required, so no return is required @@ -80,24 +78,30 @@ Status InnerSession::Finalize() { // release var memory GELOGI("VarManager free var memory."); (void)VarManager::Instance(session_id_)->FreeVarMemory(); - GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); + GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); return ret; } Status InnerSession::GetVariable(const std::string &name, Tensor &val) { - UpdateThreadContext(); + UpdateThreadContext(std::map{}); return graph_manager_.GetVariable(name, val); } Status InnerSession::AddGraph(uint32_t graph_id, const Graph &graph) { + std::map options; + return AddGraph(graph_id, graph, options); +} + +Status InnerSession::AddGraph(uint32_t graph_id, const Graph &graph, + const std::map &options) { std::lock_guard lock(resource_mutex_); if (!init_flag_) { GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_); return GE_SESS_INIT_FAILED; } - UpdateThreadContext(); - Status ret = graph_manager_.AddGraph(graph_id, graph); + UpdateThreadContext(options); + Status ret = graph_manager_.AddGraph(graph_id, graph, options); if (ret != SUCCESS) { GELOGE(ret, "[InnerSession:%lu] add graph %u failed.", session_id_, graph_id); return ret; @@ -115,7 +119,7 @@ Status InnerSession::RunGraph(uint32_t graph_id, const std::vector &inpu GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_); return GE_SESS_INIT_FAILED; } - UpdateThreadContext(); + UpdateThreadContext(graph_id); vector geInputs; for (auto &item : inputs) { geInputs.push_back(TensorAdapter::AsGeTensor(item)); @@ -147,7 +151,7 @@ Status InnerSession::RemoveGraph(uint32_t graph_id) { GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_); return GE_SESS_INIT_FAILED; } - UpdateThreadContext(); + UpdateThreadContext(graph_id); Status ret = graph_manager_.RemoveGraph(graph_id); if (ret != SUCCESS) { GELOGE(ret, "[InnerSession:%lu] remove graph failed, graph_id=%u.", session_id_, graph_id); @@ -159,14 +163,13 @@ Status InnerSession::RemoveGraph(uint32_t graph_id) { } Status InnerSession::RegisterCallBackFunc( - const std::string &key, - const std::function &)> &callback) { + const std::string &key, const std::function &)> &callback) { std::lock_guard lock(resource_mutex_); if (!init_flag_) { GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_); return GE_SESS_INIT_FAILED; } - UpdateThreadContext(); + UpdateThreadContext(std::map{}); Status ret = graph_manager_.RegisterCallBackFunc(key, callback); if (ret != SUCCESS) { GELOGE(ret, "[InnerSession:%lu] register %s callback function failed.", session_id_, key.c_str()); @@ -179,7 +182,7 @@ Status InnerSession::RegisterCallBackFunc( Status InnerSession::RunGraphAsync(uint32_t graph_id, const std::vector &inputs, std::vector &outputs, std::function callback) { - UpdateThreadContext(); + UpdateThreadContext(graph_id); GELOGI("[InnerSession:%lu] run graph on session, graph_id=%u.", session_id_, graph_id); Status ret = graph_manager_.RunGraphAsync(graph_id, inputs, outputs, session_id_, callback); if (ret != SUCCESS) { @@ -192,12 +195,24 @@ Status InnerSession::RunGraphAsync(uint32_t graph_id, const std::vector &options) { GetThreadLocalContext().SetGlobalOption(GetMutableGlobalOptions()); GetThreadLocalContext().SetSessionOption(options_); + GetThreadLocalContext().SetGraphOption(options); } + +void InnerSession::UpdateThreadContext(uint32_t graph_id) { + auto options = graph_manager_.GetGraphOptions(graph_id); + if (options == nullptr) { + GELOGW("graph level options is null."); + UpdateThreadContext(std::map{}); + } else { + UpdateThreadContext(*options); + } +} + bool InnerSession::IsGraphNeedRebuild(uint32_t graph_id) { - UpdateThreadContext(); + UpdateThreadContext(graph_id); return graph_manager_.IsGraphNeedRebuild(graph_id); } } // namespace ge diff --git a/src/ge/session/inner_session.h b/src/ge/session/inner_session.h index 38d5a9f6..d79a2eac 100644 --- a/src/ge/session/inner_session.h +++ b/src/ge/session/inner_session.h @@ -20,7 +20,6 @@ #include #include #include - #include "framework/common/ge_types.h" #include "ge/ge_api_types.h" #include "graph/manager/graph_manager.h" @@ -36,6 +35,9 @@ class InnerSession { Status AddGraph(uint32_t graph_id, const Graph &graph); + Status AddGraph(uint32_t graph_id, const Graph &graph, + const std::map &options); /*lint !e148*/ + Status RunGraph(uint32_t graph_id, const std::vector &inputs, std::vector &outputs); Status RemoveGraph(uint32_t graph_id); @@ -48,8 +50,7 @@ class InnerSession { Status GetVariable(const std::string &name, Tensor &val); Status RegisterCallBackFunc( - const std::string &key, - const std::function &)> &callback); + const std::string &key, const std::function &)> &callback); const GraphManager &getGraphManagerObj() const; @@ -61,7 +62,8 @@ class InnerSession { std::map options_; GraphManager graph_manager_; std::mutex resource_mutex_; // AddGraph, RemoveGraph and Finalize use - void UpdateThreadContext(); + void UpdateThreadContext(const std::map &options); + void UpdateThreadContext(uint32_t graph_id); }; } // namespace ge diff --git a/src/ge/session/session_manager.cc b/src/ge/session/session_manager.cc index 5fe28723..0c9685df 100644 --- a/src/ge/session/session_manager.cc +++ b/src/ge/session/session_manager.cc @@ -15,10 +15,8 @@ */ #include "session/session_manager.h" - #include #include - #include "framework/common/debug/ge_log.h" #include "common/ge/ge_util.h" #include "graph/manager/util/rt_context_util.h" @@ -138,6 +136,12 @@ Status SessionManager::GetVariable(SessionId session_id, const std::string &name } Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const Graph &graph) { + std::map options; + return AddGraph(session_id, graph_id, graph, options); +} + +Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const Graph &graph, + const std::map &options) { if (!init_flag_) { GELOGE(GE_SESSION_MANAGER_NOT_INIT); return GE_SESSION_MANAGER_NOT_INIT; @@ -207,7 +211,7 @@ bool SessionManager::HasSession(SessionId session_id) { return session_manager_map_.find(session_id) != session_manager_map_.end(); } -Status SessionManager::GetNextSessionId(SessionId &next_session_id) const { +Status SessionManager::GetNextSessionId(SessionId &next_session_id) { if (!init_flag_) { GELOGE(GE_SESSION_MANAGER_NOT_INIT); return GE_SESSION_MANAGER_NOT_INIT; diff --git a/src/ge/session/session_manager.h b/src/ge/session/session_manager.h index 3df3e007..3fd8cf6f 100644 --- a/src/ge/session/session_manager.h +++ b/src/ge/session/session_manager.h @@ -22,7 +22,6 @@ #include #include #include - #include "common/ge_inner_error_codes.h" #include "ge/ge_api_types.h" #include "session/inner_session.h" @@ -64,6 +63,18 @@ class SessionManager { /// /// @ingroup ge_session + /// @brief add a graph to the session with specific session id + /// @param [in] session_id session id + /// @param [in] graph_id graph id + /// @param [in] graph the graph to add + /// @param [in] options graph level options + /// @return Status result of function + /// + Status AddGraph(SessionId session_id, uint32_t graph_id, const Graph &graph, + const std::map &options); + + /// + /// @ingroup ge_session /// @brief run a graph of the session with specific session id /// @param [in] session_id session id /// @param [in] graph_id graph id @@ -114,8 +125,8 @@ class SessionManager { /// @return Status result of function /// Status RegisterCallBackFunc( - SessionId session_id, const std::string &key, - const std::function &)> &callback); + SessionId session_id, const std::string &key, + const std::function &)> &callback); bool IsGraphNeedRebuild(SessionId session_id, uint32_t graph_id); @@ -141,7 +152,7 @@ class SessionManager { bool HasSession(SessionId session_id); - Status GetNextSessionId(SessionId &next_session_id) const; + Status GetNextSessionId(SessionId &next_session_id); std::map session_manager_map_; std::mutex mutex_; diff --git a/src/proto/CMakeLists.txt b/src/proto/CMakeLists.txt deleted file mode 100644 index 16112443..00000000 --- a/src/proto/CMakeLists.txt +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright 2019-2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -cmake_minimum_required(VERSION 2.8) - -project(GeProto[CXX]) - -set(CMAKE_CXX_STANDARD 11) - -# add all proto files, generate corresponding .h and .cc files -file(GLOB_RECURSE PROTO_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} - "om.proto" - "ge_ir.proto" - "insert_op.proto" - "task.proto" - "fwk_adapter.proto" - "op_mapping_info.proto" - ) - -file(GLOB_RECURSE ONNX_PROTO_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} - "${onnx_INC}/onnx/onnx.proto" - ) - -ge_protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) -ge_protobuf_generate(ge PROTO_ONNX_SRCS PROTO_ONNX_HDRS ${ONNX_PROTO_LIST}) - -# include directories -include_directories(${CMAKE_CURRENT_LIST_DIR}) -include_directories(${GE_SOURCE_DIR}/src) -include_directories(${GE_SOURCE_DIR}/src/common) -include_directories(${GE_SOURCE_DIR}/src/common/graph) -include_directories(${GE_SOURCE_DIR}/inc) -include_directories(${GE_SOURCE_DIR}/inc/external) -include_directories(${GE_SOURCE_DIR}/inc/external/graph) -include_directories(${GE_SOURCE_DIR}/inc/graph) -include_directories(${GE_SOURCE_DIR}/inc/common) -include_directories(${GE_SOURCE_DIR}/third_party/securec/include) -include_directories(${GE_SOURCE_DIR}/third_party/json/include) -include_directories(${GE_SOURCE_DIR}/third_party/protobuf/src) -include_directories(${CMAKE_BINARY_DIR}) -include_directories(${CMAKE_BINARY_DIR}/proto/ge) - -set(CMAKE_CXX_FLAGS "-Wno-unused-variable ${CMAKE_CXX_FLAGS}") - -######### libproto_common.so ############# -add_library(proto_common STATIC ${PROTO_SRCS} ${PROTO_ONNX_SRCS}) -target_compile_definitions(proto_common PRIVATE - DAVINCI_CLOUD) -target_link_libraries(proto_common - ${PROTOBUF_LIBRARY} - ${c_sec} - ${slog} - rt - dl) diff --git a/src/proto/op_mapping_info.proto b/src/proto/op_mapping_info.proto index 9a460907..2bf26f7a 100644 --- a/src/proto/op_mapping_info.proto +++ b/src/proto/op_mapping_info.proto @@ -52,6 +52,15 @@ message OpMappingInfo { oneof model_id_param { uint32 model_id = 3; } - uint32 flag = 4; // 0x01 load, 0x00 unload - repeated Task task = 5; + oneof step_id { + uint64 step_id_addr = 4; + } + oneof iterations_per_loop { + uint64 iterations_per_loop_addr = 5; + } + oneof loop_cond { + uint64 loop_cond_addr = 6; + } + uint32 flag = 7; // 0x01 load, 0x00 unload + repeated Task task = 8; }; \ No newline at end of file diff --git a/tests/ut/common/graph/testcase/ge_graph/ge_operator_unittest.cc b/tests/ut/common/graph/testcase/ge_graph/ge_operator_unittest.cc index f621b848..4f412893 100644 --- a/tests/ut/common/graph/testcase/ge_graph/ge_operator_unittest.cc +++ b/tests/ut/common/graph/testcase/ge_graph/ge_operator_unittest.cc @@ -46,109 +46,6 @@ class UtestGeOperator : public testing::Test { } }; -TEST_F(UtestGeOperator, quant) { - Operator op("quant"); - - UsrQuantizeFactorParams q1; - q1.quantize_algo = USR_HALF_OFFSET_ALGO; - q1.scale_type = USR_SCALAR_SCALE; - - q1.quantize_param.scale_mode = USR_SQRT_MODE; - string s1 = "value1"; - q1.quantize_param.set_scale_value(s1.data(), s1.size()); - q1.quantize_param.scale_offset = 5; - string s2 = "value2"; - q1.quantize_param.set_offset_data_value(s2.data(), s2.size()); - q1.quantize_param.offset_data_offset = 6; - string s3 = "value3"; - q1.quantize_param.set_offset_weight_value(s3.data(), s3.size()); - q1.quantize_param.offset_weight_offset = 7; - string s4 = "value4"; - q1.quantize_param.set_offset_pad_value(s4.data(), s4.size()); - q1.quantize_param.offset_pad_offset = 8; - - q1.dequantize_param.scale_mode = USR_SQRT_MODE; - q1.dequantize_param.set_scale_value(s1.data(), s1.size()); - q1.dequantize_param.scale_offset = 15; - q1.dequantize_param.set_offset_data_value(s2.data(), s2.size()); - q1.dequantize_param.offset_data_offset = 16; - q1.dequantize_param.set_offset_weight_value(s3.data(), s3.size()); - q1.dequantize_param.offset_weight_offset = 17; - q1.dequantize_param.set_offset_pad_value(s4.data(), s4.size()); - q1.dequantize_param.offset_pad_offset = 18; - - q1.requantize_param.scale_mode = USR_SQRT_MODE; - q1.requantize_param.set_scale_value(s1.data(), s1.size()); - q1.requantize_param.scale_offset = 25; - q1.requantize_param.set_offset_data_value(s2.data(), s2.size()); - q1.requantize_param.offset_data_offset = 26; - q1.requantize_param.set_offset_weight_value(s3.data(), s3.size()); - q1.requantize_param.offset_weight_offset = 27; - q1.requantize_param.set_offset_pad_value(s4.data(), s4.size()); - q1.requantize_param.offset_pad_offset = 28; - - q1.quantizecalc_param.set_offsetw(s1.data(), s1.size()); - q1.quantizecalc_param.set_offsetd(s2.data(), s2.size()); - q1.quantizecalc_param.set_scalereq(s3.data(), s3.size()); - q1.quantizecalc_param.set_offsetdnext(s4.data(), s4.size()); - q1.quantizecalc_param.offsetw_offset = 34; - q1.quantizecalc_param.offsetd_offset = 35; - q1.quantizecalc_param.scaledreq_offset = 36; - q1.quantizecalc_param.offsetdnext_offset = 37; - - op.SetAttr("quantize_factor", q1); - UsrQuantizeFactorParams q2; - op.GetAttr("quantize_factor", q2); - - EXPECT_EQ(q2.quantize_algo, USR_HALF_OFFSET_ALGO); - EXPECT_EQ(q2.scale_type, USR_SCALAR_SCALE); - - EXPECT_EQ(q2.quantize_param.scale_mode, USR_SQRT_MODE); - EXPECT_EQ(vec2str(q2.quantize_param.scale_value), s1); - - EXPECT_EQ(q2.quantize_param.scale_offset, 5); - EXPECT_EQ(vec2str(q2.quantize_param.offset_data_value), s2); - EXPECT_EQ(q2.quantize_param.offset_data_offset, 6); - EXPECT_EQ(vec2str(q2.quantize_param.offset_weight_value), s3); - EXPECT_EQ(q2.quantize_param.offset_weight_offset, 7); - EXPECT_EQ(vec2str(q2.quantize_param.offset_pad_value), s4); - EXPECT_EQ(q2.quantize_param.offset_pad_offset, 8); - - EXPECT_EQ(q2.dequantize_param.scale_mode, USR_SQRT_MODE); - EXPECT_EQ(vec2str(q2.dequantize_param.scale_value), s1); - EXPECT_EQ(q2.dequantize_param.scale_offset, 15); - EXPECT_EQ(vec2str(q2.dequantize_param.offset_data_value), s2); - EXPECT_EQ(q2.dequantize_param.offset_data_offset, 16); - EXPECT_EQ(vec2str(q2.dequantize_param.offset_weight_value), s3); - EXPECT_EQ(q2.dequantize_param.offset_weight_offset, 17); - EXPECT_EQ(vec2str(q2.dequantize_param.offset_pad_value), s4); - EXPECT_EQ(q2.dequantize_param.offset_pad_offset, 18); - - EXPECT_EQ(q2.requantize_param.scale_mode, USR_SQRT_MODE); - EXPECT_EQ(vec2str(q2.requantize_param.scale_value), s1); - EXPECT_EQ(q2.requantize_param.scale_offset, 25); - EXPECT_EQ(vec2str(q2.requantize_param.offset_data_value), s2); - EXPECT_EQ(q2.requantize_param.offset_data_offset, 26); - EXPECT_EQ(vec2str(q2.requantize_param.offset_weight_value), s3); - EXPECT_EQ(q2.requantize_param.offset_weight_offset, 27); - EXPECT_EQ(vec2str(q2.requantize_param.offset_pad_value), s4); - EXPECT_EQ(q2.requantize_param.offset_pad_offset, 28); - - EXPECT_EQ(vec2str(q2.quantizecalc_param.offsetw), s1); - EXPECT_EQ(vec2str(q2.quantizecalc_param.offsetd), s2); - EXPECT_EQ(vec2str(q2.quantizecalc_param.scalereq), s3); - EXPECT_EQ(vec2str(q2.quantizecalc_param.offsetdnext), s4); - EXPECT_EQ(q2.quantizecalc_param.offsetw_offset, 34); - EXPECT_EQ(q2.quantizecalc_param.offsetd_offset, 35); - EXPECT_EQ(q2.quantizecalc_param.scaledreq_offset, 36); - EXPECT_EQ(q2.quantizecalc_param.offsetdnext_offset, 37); - - EXPECT_EQ(QuantizeFactorHasData(q2.quantize_param), true); - EXPECT_EQ(QuantizeFactorHasData(q2.dequantize_param), true); - EXPECT_EQ(QuantizeFactorHasData(q2.requantize_param), true); - EXPECT_EQ(QuantizeFactorHasData(q2.quantizecalc_param), true); -} - TEST_F(UtestGeOperator, try_get_input_desc) { Operator data("data0"); diff --git a/tests/ut/ge/graph/build/logical_stream_allocator_unittest.cc b/tests/ut/ge/graph/build/logical_stream_allocator_unittest.cc index 823b2c60..f9799b49 100644 --- a/tests/ut/ge/graph/build/logical_stream_allocator_unittest.cc +++ b/tests/ut/ge/graph/build/logical_stream_allocator_unittest.cc @@ -857,13 +857,13 @@ TEST_F(UtestLogicalStreamAllocator, test_all_reduce_parallel_pass) { std::map max_parallel_num; LogicalStreamPass::Context context; - context.next_stream = 3; + context.next_stream = 5; context.hcom_parallel = true; vector subgraphs; LogicalStreamPassPtr allreduce_pass = std::make_shared(); ret = allreduce_pass->Run(graph, subgraphs, context); - EXPECT_EQ(ret, SUCCESS); + EXPECT_EQ(ret, NOT_CHANGED); } } // namespace ge diff --git a/tests/ut/ge/graph/build/mem_assigner_unittest.cc b/tests/ut/ge/graph/build/mem_assigner_unittest.cc index 903ac9ad..459d8fc3 100644 --- a/tests/ut/ge/graph/build/mem_assigner_unittest.cc +++ b/tests/ut/ge/graph/build/mem_assigner_unittest.cc @@ -170,7 +170,6 @@ class MockBlockMemAssigner : public BlockMemAssigner { virtual ~MockBlockMemAssigner(){}; Status GetMemoryRanges(std::vector &ranges) override { return FAILED; } - }; } // namespace ge diff --git a/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc index c8c69689..c91ca44b 100644 --- a/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc @@ -54,7 +54,6 @@ #include "new_op_test_utils.h" #include "graph/debug/ge_attr_define.h" - using namespace std; using namespace testing; using domi::EventExDef; diff --git a/tests/ut/ge/graph/passes/base_pass_unittest.cc b/tests/ut/ge/graph/passes/base_pass_unittest.cc index b2767687..858c36dd 100644 --- a/tests/ut/ge/graph/passes/base_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/base_pass_unittest.cc @@ -111,7 +111,6 @@ class UTESTGraphPassesBasePass : public testing::Test { } NamesToPass names_to_pass_; }; - /// reshape1 /// | /// add1 diff --git a/tests/ut/ge/graph/passes/flow_ctrl_pass_unittest.cc b/tests/ut/ge/graph/passes/flow_ctrl_pass_unittest.cc old mode 100644 new mode 100755 diff --git a/tests/ut/ge/graph/passes/folding_kernel/broadcast_gradient_args_kernel_unittest.cc b/tests/ut/ge/graph/passes/folding_kernel/broadcast_gradient_args_kernel_unittest.cc index 904e5cb7..d73771ac 100644 --- a/tests/ut/ge/graph/passes/folding_kernel/broadcast_gradient_args_kernel_unittest.cc +++ b/tests/ut/ge/graph/passes/folding_kernel/broadcast_gradient_args_kernel_unittest.cc @@ -176,8 +176,8 @@ TEST_F(UtestBroadcastGradientArgsKernel, CheckOutputInputsSameEmptyOut) { }); vector> output_shape_dims({ - {}, - {}, + {0}, + {0}, }); vector> output_data({{}, {}}); @@ -219,7 +219,7 @@ TEST_F(UtestBroadcastGradientArgsKernel, CheckOutputInputsBothScalar) { }); vector> output_shape_dims({ - {}, + {0}, {1}, }); vector> output_data({{}, {0}}); diff --git a/tests/ut/ge/graph/passes/folding_kernel/empty_kernel_unittest.cc b/tests/ut/ge/graph/passes/folding_kernel/empty_kernel_unittest.cc index bb33582f..91561a5b 100644 --- a/tests/ut/ge/graph/passes/folding_kernel/empty_kernel_unittest.cc +++ b/tests/ut/ge/graph/passes/folding_kernel/empty_kernel_unittest.cc @@ -277,4 +277,4 @@ TEST_F(UtestEmptyKernel, CheckOutputNormalOtherType) { TESTBYTYPE(DT_BOOL, bool) TESTBYTYPE(DT_DOUBLE, double) #undef TESTBYTYPE -} \ No newline at end of file +} diff --git a/tests/ut/ge/graph/passes/folding_kernel/expanddims_kernel_unittest.cc b/tests/ut/ge/graph/passes/folding_kernel/expanddims_kernel_unittest.cc old mode 100644 new mode 100755 diff --git a/tests/ut/ge/graph/passes/folding_kernel/pack_kernel_unittest.cc b/tests/ut/ge/graph/passes/folding_kernel/pack_kernel_unittest.cc index 787e772a..53fabfdd 100644 --- a/tests/ut/ge/graph/passes/folding_kernel/pack_kernel_unittest.cc +++ b/tests/ut/ge/graph/passes/folding_kernel/pack_kernel_unittest.cc @@ -380,4 +380,4 @@ TEST_F(UtestGraphPassesFoldingKernelPackKernel, PackOptimizerFailedErrtype) { shared_ptr kernel = KernelFactory::Instance().Create(PACK); Status status = kernel->Compute(op_desc_ptr, input, outputs); -} \ No newline at end of file +} diff --git a/tests/ut/ge/graph/passes/folding_kernel/size_kernel_unittest.cc b/tests/ut/ge/graph/passes/folding_kernel/size_kernel_unittest.cc index f1a117f2..95644b4d 100644 --- a/tests/ut/ge/graph/passes/folding_kernel/size_kernel_unittest.cc +++ b/tests/ut/ge/graph/passes/folding_kernel/size_kernel_unittest.cc @@ -168,4 +168,4 @@ TEST_F(UtestGraphPassesFoldingKernelSizeKernel, OpdescIsNullptr) { std::vector outputs; Status status = kernel_->Compute(node_, outputs); EXPECT_EQ(NOT_CHANGED, status); -} \ No newline at end of file +} diff --git a/tests/ut/ge/graph/passes/folding_kernel/strided_slice_kernel_unittest.cc b/tests/ut/ge/graph/passes/folding_kernel/strided_slice_kernel_unittest.cc index a2bb92f2..0b16bf97 100644 --- a/tests/ut/ge/graph/passes/folding_kernel/strided_slice_kernel_unittest.cc +++ b/tests/ut/ge/graph/passes/folding_kernel/strided_slice_kernel_unittest.cc @@ -355,6 +355,7 @@ TEST_F(UtestGraphPassesFoldingKernelStridedSliceKernel, Test10) { vector outputs; shared_ptr kernel = KernelFactory::Instance().Create(STRIDEDSLICE); + ge::Status status = kernel->Compute(op_desc_ptr, input, outputs); // EXPECT_EQ(PARAM_INVALID, status); } diff --git a/tests/ut/ge/graph/passes/merge_pass_unittest.cc b/tests/ut/ge/graph/passes/merge_pass_unittest.cc old mode 100644 new mode 100755 diff --git a/tests/ut/ge/graph/passes/print_op_pass_unittest.cc b/tests/ut/ge/graph/passes/print_op_pass_unittest.cc index 65f7d37c..3a75210e 100644 --- a/tests/ut/ge/graph/passes/print_op_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/print_op_pass_unittest.cc @@ -22,7 +22,6 @@ #include "utils/op_desc_utils.h" using domi::GetContext; - namespace ge { class UtestGraphPassesPrintOpPass : public testing::Test { protected: diff --git a/tests/ut/ge/graph/passes/snapshot_pass_unittest.cc b/tests/ut/ge/graph/passes/snapshot_pass_unittest.cc old mode 100644 new mode 100755 diff --git a/tests/ut/ge/graph/passes/update_net_output_pass_unittest.cc b/tests/ut/ge/graph/passes/update_net_output_pass_unittest.cc index 97498616..78186344 100644 --- a/tests/ut/ge/graph/passes/update_net_output_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/update_net_output_pass_unittest.cc @@ -36,7 +36,6 @@ #undef private using namespace testing; - namespace ge { class UtestNodePassesUpdateNetoutputPass : public Test { protected: diff --git a/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc b/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc index bc11b4aa..6081ca43 100644 --- a/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc +++ b/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc @@ -63,7 +63,7 @@ TEST_F(UtestGeProfilinganager, init_success) { setenv("PROFILING_MODE", "true", true); Options options; options.device_id = 0; - options.job_id = 0; + options.job_id = "0"; string profiling_config; ProfilingManager::Instance().SetProfilingConfig(profiling_config); diff --git a/tests/ut/ge/single_op/single_op_manager_unittest.cc b/tests/ut/ge/single_op/single_op_manager_unittest.cc old mode 100644 new mode 100755 diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc old mode 100644 new mode 100755 diff --git a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h index caafd600..bb1f63a9 100644 --- a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h +++ b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h @@ -14,17 +14,19 @@ * limitations under the License. */ -#ifndef __AICPU_TASK_STRUCT_H__ -#define __AICPU_TASK_STRUCT_H__ +#ifndef AICPU_TASK_STRUCT_H_ +#define AICPU_TASK_STRUCT_H_ #include namespace aicpu { struct AicpuParamHead { - uint32_t length; // Total length: include cunstom message - uint32_t ioAddrNum; // Input and output address number -} __attribute__((packed)); + uint32_t length; // Total length: include cunstom message + uint32_t ioAddrNum; // Input and output address number +} __attribute__ ((packed)); + +} // namespace aicpu + +#endif // AICPU_TASK_STRUCT_H_ -} // end namespace aicpu -#endif //__AICPU_TASK_STRUCT_H__ diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine.h b/third_party/fwkacllib/inc/cce/aicpu_engine.h index f561dc72..740f1200 100644 --- a/third_party/fwkacllib/inc/cce/aicpu_engine.h +++ b/third_party/fwkacllib/inc/cce/aicpu_engine.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __AICPU_ENGINE_H__ -#define __AICPU_ENGINE_H__ +#ifndef AICPU_ENGINE_H__ +#define AICPU_ENGINE_H__ #ifdef __cplusplus extern "C" { @@ -46,4 +46,4 @@ aeStatus_t aeCallInterface(void *addr); } #endif -#endif +#endif // AICPU_ENGINE_H__ diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h index b6dd1127..8e996b9b 100644 --- a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h +++ b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __AICPU_ENGINE_STRUCT_H__ -#define __AICPU_ENGINE_STRUCT_H__ +#ifndef AICPU_ENGINE_STRUCT_H__ +#define AICPU_ENGINE_STRUCT_H__ #include "fwk_adpt_struct.h" @@ -43,4 +43,4 @@ typedef struct { #ifdef __cplusplus } #endif -#endif +#endif // AICPU_ENGINE_STRUCT_H__ diff --git a/third_party/fwkacllib/inc/cce/blas_struct.h b/third_party/fwkacllib/inc/cce/blas_struct.h index b490c30d..e0bcee4c 100644 --- a/third_party/fwkacllib/inc/cce/blas_struct.h +++ b/third_party/fwkacllib/inc/cce/blas_struct.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __CC_BLAS_STRUCT_API__ -#define __CC_BLAS_STRUCT_API__ +#ifndef CC_BLAS_STRUCT_API__ +#define CC_BLAS_STRUCT_API__ #include @@ -28,4 +28,4 @@ typedef enum { typedef enum { CCBLAS_DIAG_NON_UNIT = 0, CCBLAS_DIAG_UNIT = 1 } ccblasDiagType_t; -#endif /*__CC_BLAS_STRUCT_API__*/ +#endif // CC_BLAS_STRUCT_API__ diff --git a/third_party/fwkacllib/inc/cce/cce.h b/third_party/fwkacllib/inc/cce/cce.h index 4953b3ec..0cd9613a 100644 --- a/third_party/fwkacllib/inc/cce/cce.h +++ b/third_party/fwkacllib/inc/cce/cce.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __CCE_H__ -#define __CCE_H__ +#ifndef CCE_H__ +#define CCE_H__ #include #include "cce_def.hpp" @@ -98,4 +98,4 @@ ccStatus_t CceProfilingConfig(const char *target, const char *job_ctx, uint32_t }; // namespace cce -#endif /* __CCE_H__ */ +#endif // CCE_H__ diff --git a/third_party/fwkacllib/inc/cce/cce_def.hpp b/third_party/fwkacllib/inc/cce/cce_def.hpp old mode 100644 new mode 100755 index 4dded1d7..7b1a1b8a --- a/third_party/fwkacllib/inc/cce/cce_def.hpp +++ b/third_party/fwkacllib/inc/cce/cce_def.hpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __CCE_DEF_H__ -#define __CCE_DEF_H__ +#ifndef CCE_DEF_H__ +#define CCE_DEF_H__ #include "runtime/rt.h" @@ -149,4 +149,4 @@ ccStatus_t ccSetFuncState(ccFuncParamType_t type, bool isOpen); bool ccGetFuncState(ccFuncParamType_t type); } // namespace cce -#endif +#endif // CCE_DEF_H__ diff --git a/third_party/fwkacllib/inc/cce/common/attr_list.hpp b/third_party/fwkacllib/inc/cce/common/attr_list.hpp old mode 100644 new mode 100755 index b6c68c51..bf48e9fc --- a/third_party/fwkacllib/inc/cce/common/attr_list.hpp +++ b/third_party/fwkacllib/inc/cce/common/attr_list.hpp @@ -14,8 +14,9 @@ * limitations under the License. */ -#ifndef __ATTR_LIST_HPP__ -#define __ATTR_LIST_HPP__ +#ifndef ATTR_LIST_HPP__ +#define ATTR_LIST_HPP__ + #include "catch.hpp" /** @@ -78,4 +79,4 @@ class AttrList { uint32_t initLen_; uint32_t Init(); }; -#endif /* __ATTR_LIST_HPP__ */ +#endif // ATTR_LIST_HPP__ diff --git a/third_party/fwkacllib/inc/cce/common/catch.hpp b/third_party/fwkacllib/inc/cce/common/catch.hpp old mode 100644 new mode 100755 index 28a6dc05..c440be53 --- a/third_party/fwkacllib/inc/cce/common/catch.hpp +++ b/third_party/fwkacllib/inc/cce/common/catch.hpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef _CATCH_HPP_ -#define _CATCH_HPP_ +#ifndef CATCH_HPP_ +#define CATCH_HPP_ #include #include @@ -92,4 +92,4 @@ __tabFinal: #define END_FINAL /*GO_ASSERT_FALSE()*/ ; #define GOTO_FINAL() goto __tabFinal; -#endif /* __ATTRINFO_MAP_HPP__ */ +#endif // CATCH_HPP_ diff --git a/third_party/fwkacllib/inc/cce/compiler_stub.h b/third_party/fwkacllib/inc/cce/compiler_stub.h index a380d6b6..00ea467e 100644 --- a/third_party/fwkacllib/inc/cce/compiler_stub.h +++ b/third_party/fwkacllib/inc/cce/compiler_stub.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __COMPILER_STUB_H__ -#define __COMPILER_STUB_H__ +#ifndef COMPILER_STUB_H__ +#define COMPILER_STUB_H__ namespace cce { @@ -33,4 +33,4 @@ bool compilerStubFree(); }; // namespace cce -#endif /* __COMPILER_STUB_H__ */ +#endif // COMPILER_STUB_H__ diff --git a/third_party/fwkacllib/inc/cce/customize.h b/third_party/fwkacllib/inc/cce/customize.h index 6766d987..7dd97af1 100644 --- a/third_party/fwkacllib/inc/cce/customize.h +++ b/third_party/fwkacllib/inc/cce/customize.h @@ -14,8 +14,9 @@ * limitations under the License. */ -#ifndef __CC_CUSTOMIZE_API__ -#define __CC_CUSTOMIZE_API__ +#ifndef CC_CUSTOMIZE_API__ +#define CC_CUSTOMIZE_API__ + #include #define CC_DEVICE_DIM_MAX 8 @@ -55,5 +56,5 @@ typedef void (*aicpu_run_func)(opTensor_t **, void **, int32_t, opTensor_t **, void **, int32_t, void *, rtStream_t); -#endif /*__CC_CUSTOMIZE_API__*/ +#endif // CC_CUSTOMIZE_API__ diff --git a/third_party/fwkacllib/inc/cce/dnn.h b/third_party/fwkacllib/inc/cce/dnn.h index dac0d6c0..03ca7d5a 100644 --- a/third_party/fwkacllib/inc/cce/dnn.h +++ b/third_party/fwkacllib/inc/cce/dnn.h @@ -14,10 +14,10 @@ * limitations under the License. */ -#ifndef __DNN_H__ -#define __DNN_H__ +#ifndef DNN_H__ +#define DNN_H__ #include "cce/dnn_base.h" #include "cce/dnn_op.h" -#endif /* __DNN_H__ */ +#endif // DNN_H__ diff --git a/third_party/fwkacllib/inc/cce/dnn_base.h b/third_party/fwkacllib/inc/cce/dnn_base.h index 9e9134d9..912ba671 100644 --- a/third_party/fwkacllib/inc/cce/dnn_base.h +++ b/third_party/fwkacllib/inc/cce/dnn_base.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __DNN_BASE_H__ -#define __DNN_BASE_H__ +#ifndef DNN_BASE_H__ +#define DNN_BASE_H__ #include "cce/blas_struct.h" #include "cce/customize.h" @@ -26,7 +26,7 @@ namespace cce { * @ingroup dnn * @brief Minimum epsilon allowed to be used in the Batch Normalization formula */ -#define CC_BN_MIN_EPSILON +#define CC_BN_MIN_EPSILON (1e-7) #ifndef NULL #ifdef __cplusplus @@ -673,4 +673,4 @@ ccStatus_t ccSetQuantizeAlgoAndScaleType(ccQuantizeDescriptor_t quantizeInfo, }; /* end cce */ -#endif /* __DNN_BASE_H__ */ +#endif // DNN_BASE_H__ diff --git a/third_party/fwkacllib/inc/cce/dnn_base_def.hpp b/third_party/fwkacllib/inc/cce/dnn_base_def.hpp old mode 100644 new mode 100755 index abe0faa4..8ce5e933 --- a/third_party/fwkacllib/inc/cce/dnn_base_def.hpp +++ b/third_party/fwkacllib/inc/cce/dnn_base_def.hpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __DNN_BASE_HPP__ -#define __DNN_BASE_HPP__ +#ifndef DNN_BASE_HPP__ +#define DNN_BASE_HPP__ #include "cce/cce_def.hpp" @@ -603,7 +603,6 @@ typedef union tagCcActivationPara { */ typedef enum tagCcSquareMode { CC_SQUARE_2 = 0, /* square */ - // CC_SQUARE_3, /* cube */ } ccSquareMode_t; /** @@ -755,22 +754,10 @@ typedef struct tagCcQuantize { ccConvolutionScaleType_t scaleWType; // show scaleRq,scaleDq type - // The quantization parameter is used when input is FP16. - // scalar - // scaleValueMode=SCALE_NORMAL, = 1/scale_d - // scaleValueMode=SCALE_SQRT, = (1/scale_d)^1/2 CcQuantizePara_t scaleQ; - // The requantized parameter is used when output is int8. - // When scaleWType=SCALE_VEC, the dimension of the vector corresponds to the N dimension of filer; SCALE_SCALAR is - // scalars. scaleValueMode=SCALE_NORMAL, = scale_d*scale_w[n]/scale_d_next*1/DEQSCALE, DEQSCALE=2^-17 - // scaleValueMode=SCALE_SQRT, = (scale_d*scale_w[n]/scale_d_next*1/DEQSCALE)^1/2, DEQSCALE=2^-17 CcQuantizePara_t scaleRq; - // The dequantization parameter is used when output is fp16. - // When scaleWType=SCALE_VEC, the dimension of the vector corresponds to the N dimension of filer; SCALE_SCALAR is - // scalars. scaleValueMode=SCALE_NORMAL, = scale_d*scale_w[n]*1/DEQSCALE, DEQSCALE=2^-17 scaleValueMode=SCALE_SQRT, = - // (scale_d*scale_w[n]*1/DEQSCALE)^1/2, DEQSCALE=2^-17 CcQuantizePara_t scaleDq; // need relu @@ -1004,4 +991,4 @@ typedef struct tagCcArgmaxmin *ccArgmaxminDescriptor_t; }; // namespace cce -#endif /* __DNN_BASE_HPP__ */ +#endif // DNN_BASE_HPP__ diff --git a/third_party/fwkacllib/inc/cce/dnn_op.h b/third_party/fwkacllib/inc/cce/dnn_op.h index 305199af..7895066b 100644 --- a/third_party/fwkacllib/inc/cce/dnn_op.h +++ b/third_party/fwkacllib/inc/cce/dnn_op.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __DNN_OP_H__ -#define __DNN_OP_H__ +#ifndef DNN_OP_H__ +#define DNN_OP_H__ #include "cce/blas_struct.h" #include "cce/cce.h" @@ -3461,7 +3461,7 @@ ccStatus_t ccEmbeddingLookupForward(ccHandle_t handle, const void *alpha, const /** * @ingroup - * @brief embedding lookup forward. + * @brief embedding lookup forward. for nn.embedding_lookup * @param [in] handle cce handle * @param [in] alpha common scale factor * @param [in] inputNum inputNum @@ -4835,4 +4835,4 @@ ccStatus_t ccIsFinite(ccHandle_t handle, const void *alpha, const ccTensorDescri const ccTensorDescriptor_t yDesc, const void *y, const void *beta); }; // namespace cce -#endif /* __DNN_OP_H__ */ +#endif // DNN_OP_H__ diff --git a/third_party/fwkacllib/inc/cce/dnn_struct.hpp b/third_party/fwkacllib/inc/cce/dnn_struct.hpp old mode 100644 new mode 100755 index 8b9cc097..96566074 --- a/third_party/fwkacllib/inc/cce/dnn_struct.hpp +++ b/third_party/fwkacllib/inc/cce/dnn_struct.hpp @@ -14,10 +14,10 @@ * limitations under the License. */ -#ifndef __DNN_STRUCT_HPP__ -#define __DNN_STRUCT_HPP__ +#ifndef DNN_STRUCT_HPP__ +#define DNN_STRUCT_HPP__ #include "dnn.h" #include "dnn_struct_base.hpp" -#endif /* __DNN_STRUCT_HPP__ */ +#endif // DNN_STRUCT_HPP__ diff --git a/third_party/fwkacllib/inc/cce/dnn_struct_base.hpp b/third_party/fwkacllib/inc/cce/dnn_struct_base.hpp old mode 100644 new mode 100755 index 672cf77f..dd75e9ea --- a/third_party/fwkacllib/inc/cce/dnn_struct_base.hpp +++ b/third_party/fwkacllib/inc/cce/dnn_struct_base.hpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __DNN_STRUCT_BASE_HPP__ -#define __DNN_STRUCT_BASE_HPP__ +#ifndef DNN_STRUCT_BASE_HPP__ +#define DNN_STRUCT_BASE_HPP__ #include "cce/cce_def.hpp" @@ -891,4 +891,4 @@ typedef struct tagCcConcatFive2Four_t { } ccConcatFive2Four_t; }; // namespace cce -#endif /* __DNN_STRUCT_BASE_HPP__ */ +#endif // DNN_STRUCT_BASE_HPP__ diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h index 91666607..ff0f10e3 100644 --- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h +++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __FWK_ADPT_STRUCT_H__ -#define __FWK_ADPT_STRUCT_H__ +#ifndef FWK_ADPT_STRUCT_H__ +#define FWK_ADPT_STRUCT_H__ #include @@ -78,4 +78,4 @@ struct ResultSummary { } // end namespace FWKAdapter } // namespace aicpu -#endif //__FWK_ADPT_STRUCT_H__ +#endif // FWK_ADPT_STRUCT_H__ diff --git a/third_party/fwkacllib/inc/cce/l2fusion_struct.hpp b/third_party/fwkacllib/inc/cce/l2fusion_struct.hpp old mode 100644 new mode 100755 index 319d65b9..fa5a95c9 --- a/third_party/fwkacllib/inc/cce/l2fusion_struct.hpp +++ b/third_party/fwkacllib/inc/cce/l2fusion_struct.hpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef _L2FUSION_STRUCT_HPP_ -#define _L2FUSION_STRUCT_HPP_ +#ifndef L2FUSION_STRUCT_HPP_ +#define L2FUSION_STRUCT_HPP_ #include #include @@ -53,4 +53,4 @@ typedef std::pair TaskL2InfoFEPair_t; // the key is nodeN } // namespace fusion -#endif +#endif // L2FUSION_STRUCT_HPP_ diff --git a/third_party/fwkacllib/inc/cce/optimizer/fusion_engine.h b/third_party/fwkacllib/inc/cce/optimizer/fusion_engine.h index 4e38c9eb..299998e3 100644 --- a/third_party/fwkacllib/inc/cce/optimizer/fusion_engine.h +++ b/third_party/fwkacllib/inc/cce/optimizer/fusion_engine.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef _FUSION_ENGINE_HPP_ -#define _FUSION_ENGINE_HPP_ +#ifndef FUSION_ENGINE_HPP_ +#define FUSION_ENGINE_HPP_ #include "cce/cce.h" #include "graph/compute_graph.h" @@ -62,4 +62,4 @@ void FusionTaskBuildComplete(std::vector cchandleList); uint32_t GraphFusionTrain(ge::ComputeGraphPtr origGraph, ge::ComputeGraphPtr fusionGraph); } // namespace fusion -#endif +#endif // FUSION_ENGINE_HPP_ diff --git a/third_party/fwkacllib/inc/cce/taskdown_api.h b/third_party/fwkacllib/inc/cce/taskdown_api.h index ef449bb2..db06350c 100644 --- a/third_party/fwkacllib/inc/cce/taskdown_api.h +++ b/third_party/fwkacllib/inc/cce/taskdown_api.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef _TASKDOWN_API_H_ -#define _TASKDOWN_API_H_ +#ifndef TASKDOWN_API_H_ +#define TASKDOWN_API_H_ #include #include @@ -34,6 +34,7 @@ typedef struct tagOpAddrsInfo { ccStatus_t ccUpdateKernelArgs(ccOpContext &opContext, uint64_t dataBaseAddr, uint64_t weightBaseAddr, uint64_t variableBaseAddr, void *argsAddr, uint64_t argsSize, void *l2ctrlAddr); + ccStatus_t ccGetKernelArgsAddrs(ccOpContext &opContext, void *argsAddr, uint64_t argsSize, void *l2ctrlAddr, std::vector &opAddrsInfo); @@ -42,4 +43,4 @@ ccStatus_t ccSetKernelArgs(std::vector &dateInfo); ccStatus_t ccGetKernelTypeByOpId(uint32_t opId, ccKernelType &kernelType); } // namespace cce -#endif +#endif // TASKDOWN_API_H_ diff --git a/third_party/fwkacllib/inc/cce/taskdown_common.hpp b/third_party/fwkacllib/inc/cce/taskdown_common.hpp old mode 100644 new mode 100755 index 3993e50f..51a8ba11 --- a/third_party/fwkacllib/inc/cce/taskdown_common.hpp +++ b/third_party/fwkacllib/inc/cce/taskdown_common.hpp @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef _TASKDOWN_COMMON_H_ -#define _TASKDOWN_COMMON_H_ +#ifndef TASKDOWN_COMMON_H_ +#define TASKDOWN_COMMON_H_ #include #include "cce/cce_def.hpp" @@ -103,4 +103,4 @@ ccStatus_t ccClearStreamL2Map(ccHandle_t handle); ccStatus_t ccGetKernelReadCount(rtStream_t streamId, ccOpReadCount &rc); } // namespace cce -#endif +#endif // TASKDOWN_COMMON_H_ diff --git a/third_party/fwkacllib/inc/hccl/base.h b/third_party/fwkacllib/inc/hccl/base.h index 2777fa23..c2d22630 100644 --- a/third_party/fwkacllib/inc/hccl/base.h +++ b/third_party/fwkacllib/inc/hccl/base.h @@ -14,93 +14,110 @@ * limitations under the License. */ -#ifndef __HCCL_BASE_H__ -#define __HCCL_BASE_H__ +/** + * @file base.h + * @brief HCOM data type definition + * + */ + +#ifndef HCCL_BASE_H_ +#define HCCL_BASE_H_ #ifdef __cplusplus extern "C" { -#endif +#endif // __cplusplus typedef signed char s8; typedef signed short s16; typedef signed int s32; typedef signed long long s64; - typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; typedef unsigned long long u64; +/** + * @brief HCOM functions return value definition + */ typedef enum tagHcclResult { - HCCL_SUCCESS = 0, /**< success */ - HCCL_E_PARA = 1, /**< parameter error */ - HCCL_E_PTR = 2, /**< empty pointer */ - HCCL_E_MEMORY = 3, /**< memory error */ - HCCL_E_INTERNAL = 4, /**< internal error */ - HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ - HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ - HCCL_E_UNAVAIL = 7, /**< resource unavailable */ - HCCL_E_SYSCALL = 8, /**< call system interface error */ - HCCL_E_TIMEOUT = 9, /**< timeout */ - HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ - HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ - HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ - HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ - HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ - HCCL_E_RUNTIME = 15, /**< call runtime api fail */ - HCCL_E_DRV = 16, /**< call driver api fail */ - HCCL_E_PROFILING = 17, /**< call profiling api fail */ - HCCL_E_CCE = 18, /**< call cce api fail */ - HCCL_E_NETWORK = 19, /**< call network api fail */ - HCCL_E_RESERVED /**< reserved */ + HCCL_SUCCESS = 0, /**< success */ + HCCL_E_PARA = 1, /**< parameter error */ + HCCL_E_PTR = 2, /**< empty pointer */ + HCCL_E_MEMORY = 3, /**< memory error */ + HCCL_E_INTERNAL = 4, /**< internal error */ + HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ + HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ + HCCL_E_UNAVAIL = 7, /**< resource unavailable */ + HCCL_E_SYSCALL = 8, /**< call system interface error */ + HCCL_E_TIMEOUT = 9, /**< timeout */ + HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ + HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ + HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ + HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ + HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ + HCCL_E_RUNTIME = 15, /**< call runtime api fail */ + HCCL_E_DRV = 16, /**< call driver api fail */ + HCCL_E_PROFILING = 17, /**< call profiling api fail */ + HCCL_E_CCE = 18, /**< call cce api fail */ + HCCL_E_NETWORK = 19, /**< call network api fail */ + HCCL_E_RESERVED /**< reserved */ } hcclResult_t; /* handle to communicator */ typedef void *hcclComm_t; +/** + * @brief HCCL Reduction opperation + */ typedef enum tagHcclRedOp { - HCCL_REP_OP_SUM = 0, /**< sum */ - HCCL_REP_OP_PROD = 1, /**< prod */ - HCCL_REP_OP_MAX = 2, /**< max */ - HCCL_REP_OP_MIN = 3, /**< min */ - HCCL_REP_OP_RESERVED /**< reserved */ + HCCL_REP_OP_SUM = 0, /**< sum */ + HCCL_REP_OP_PROD = 1, /**< prod */ + HCCL_REP_OP_MAX = 2, /**< max */ + HCCL_REP_OP_MIN = 3, /**< min */ + HCCL_REP_OP_RESERVED /**< reserved */ } hcclRedOp_t; +/** + * @brief HCCL data type + */ typedef enum tagHcclDataType { - HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ - HCCL_DATA_TYPE_INT = 1, /**< int32 */ - HCCL_DATA_TYPE_HALF = 2, /**< fp16 */ - HCCL_DATA_TYPE_FLOAT = 3, /**< fp32 */ - HCCL_DATA_TYPE_RESERVED /**< reserved */ + HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ + HCCL_DATA_TYPE_INT = 1, /**< int32 */ + HCCL_DATA_TYPE_HALF = 2, /**< fp16 */ + HCCL_DATA_TYPE_FLOAT = 3, /**< fp32 */ + HCCL_DATA_TYPE_RESERVED /**< reserved */ } hcclDataType_t; const s32 HCCL_TAG_ANY = -1; const u32 BASE_UNIQUE_ID_BYTES = 27; #define HCCL_UNIQUE_ID_BYTES (BASE_UNIQUE_ID_BYTES + 5 + 16 + 128) typedef struct { - char internal[HCCL_UNIQUE_ID_BYTES]; + char internal[HCCL_UNIQUE_ID_BYTES]; } hcclUniqueId; -const u32 HCCL_MAX_SEGMENT_NUM = 8; +const u32 HCCL_MAX_SEGMENT_NUM = 8; // The max number of gradient segments. +/** + * @brief the feature of the model + */ struct model_feature { - const char *model_name; - u32 gradient_num; - float *gradient_size; - float *gradient_time; + const char *model_name; /**< The model name */ + u32 gradient_num; /**< The number of gradients */ + float *gradient_size; /**< The size of each gradient */ + float *gradient_time; /**< The BP compution time of each gradient */ }; /** - * @brief stream handle. - */ +* @brief stream handle. +*/ typedef void *rtStream_t; /** - * @brief model handle. - */ +* @brief model handle. +*/ typedef void *rtModel_t; + #ifdef __cplusplus } -#endif - -#endif // __HCCL_BASE_H__ +#endif // __cplusplus +#endif // HCCL_BASE_H_ diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h index 323faf09..8ac2b4bc 100644 --- a/third_party/fwkacllib/inc/hccl/hcom.h +++ b/third_party/fwkacllib/inc/hccl/hcom.h @@ -14,66 +14,262 @@ * limitations under the License. */ -#ifndef __HCOM_H__ -#define __HCOM_H__ +/** + * @file hcom.h + * @brief HCOM API + */ -#include +#ifndef HCOM_H_ +#define HCOM_H_ +#include #include #ifdef __cplusplus extern "C" { -#endif +#endif // __cplusplus +/** + * @brief Initialize HCOM. + * + * @param rank_table A string identifying the rank table file path, include file name. + * @param identify A string identifying the identify for the rank. + * @return hcclResult_t + * @see hcom_destroy() + */ extern hcclResult_t hcom_init(const char *rank_table, const char *identify); +/** + * @brief Destroy HCOM + * + * @return hcclResult_t + * @see hcom_init() + */ extern hcclResult_t hcom_destroy(void); +/** + * @brief Bind the model. + * + * @param model A pointer identifying the model information. + * @param stream A pointer identifying the stream information. + * @return hcclResult_t + * @see hcom_unbind_model() + */ extern hcclResult_t hcom_bind_model(rtModel_t model, rtStream_t stream); +/** + * @brief Unbind the model. + * + * @param model An pointer identifying the model information. + * @return hcclResult_t + * @see hcom_unbind_model() + */ extern hcclResult_t hcom_unbind_model(rtModel_t model); +/** + * @brief All-gather operator. + * + * @param tag A string identifying the tag of the operator. + * @param inputPtr A pointer identifying the input data address of the operator. + * @param outputPtr A pointer identifying the output data address of the operator. + * @param inputCount An integer(u64) identifying the number of the input data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param group A string identifying the group name of ranks participating in the operator. + * @param stream A pointer identifying the stream information. + * @return hcclResult_t + */ extern hcclResult_t hcom_all_gather(const char *tag, void *inputPtr, void *outputPtr, u64 inputCount, hcclDataType_t dataType, const char *group, rtStream_t stream); +/** + * @brief All-reduce operator. + * + * @param tag A string identifying the tag of the operator. + * @param inputPtr A pointer identifying the input data address of the operator. + * @param outputPtr A pointer identifying the output data address of the operator. + * @param count An integer(u64) identifying the number of the output data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. + * @param group A string identifying the group name of ranks participating in the operator. + * @param stream A pointer identifying the stream information. + * @return hcclResult_t + */ extern hcclResult_t hcom_all_reduce(const char *tag, void *inputPtr, void *outputPtr, u64 count, hcclDataType_t dataType, hcclRedOp_t op, const char *group, rtStream_t stream); +/** + * @brief Broadcast operator. + * + * @param tag A string identifying the tag of the operator. + * @param ptr A pointer identifying the data address of the operator. + * @param count An integer(u64) identifying the number of the data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param root An integer(u32) identifying the the root rank in the operator. + * @param group A string identifying the group name of ranks participating in the operator. + * @param stream A pointer identifying the stream information. + * @return hcclResult_t + */ extern hcclResult_t hcom_broadcast(const char *tag, void *ptr, u64 count, hcclDataType_t dataType, u32 root, const char *group, rtStream_t stream); +/** + * @brief Reduce-scatter operator. + * + * @param tag A string identifying the tag of the operator. + * @param inputPtr A pointer identifying the input data address of the operator. + * @param outputPtr A pointer identifying the output data address of the operator. + * @param count An integer(u64) identifying the number of the data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. + * @param group A string identifying the group name of ranks participating in the operator. + * @param stream A pointer identifying the stream information. + * @return hcclResult_t + */ extern hcclResult_t hcom_reduce_scatter(const char *tag, void *inputPtr, void *outputPtr, u64 count, hcclDataType_t dataType, hcclRedOp_t op, const char *group, rtStream_t stream); +/** + * @brief Get the rank number in the group. + * + * @param group A string identifying the group name. + * @param rankSize A pointer identifying the rank number. + * @return hcclResult_t + */ hcclResult_t hcom_get_rank_size(const char *group, u32 *rankSize); +/** + * @brief Get the rank number of this rank's server within the group. + * + * @param group A string identifying the group name. + * @param localRankSize A pointer identifying the rank number. + * @return hcclResult_t + */ hcclResult_t hcom_get_local_rank_size(const char *group, u32 *localRankSize); +/** + * @brief Get the rank id of this rank. + * + * @param group A string identifying the group name. + * @param rankId A pointer identifying the rank id. + * @return hcclResult_t + */ hcclResult_t hcom_get_rank_id(const char *group, u32 *rankId); +/** + * @brief Get the local rank id of this rank's server within the group. + * + * @param group A string identifying the group name. + * @param localRankId A pointer identifying the local rank id. + * @return hcclResult_t + */ hcclResult_t hcom_get_local_rank_id(const char *group, u32 *localRankId); +/** + * @brief Get the world rank id according to the group rank id. + * + * @param group A string identifying the group name. + * @param groupRank An integer(u32) identifying the group rank id. + * @param worldRank A pointer identifying the world rank id. + * @return hcclResult_t + */ hcclResult_t hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); +/** + * @brief Get the group rank id according to the world rank id. + * + * @param worldRank An integer(u32) identifying the world rank id. + * @param group A string identifying the group name. + * @param groupRank A pointer identifying the group rank id. + * @return hcclResult_t + */ hcclResult_t hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank); +/** + * @brief Create group. + * + * @param group A string identifying the group name. + * @param rankNum An integer(u32) identifying the number of ranks in the group. + * @param rankIds A list identifying the ranks in the group. + * @return hcclResult_t + */ hcclResult_t hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); +/** + * @brief Destroy group + * + * @param group A string identifying the group name. + * @return hcclResult_t + */ hcclResult_t hcom_destroy_group(const char *group); -hcclResult_t hcom_send(const char *tag, void *inputPtr, u64 count, hcclDataType_t dataType, u32 destRank, u32 srTag, - const char *group, rtStream_t stream); +/** + * @brief Send operator. + * + * @param tag A string identifying the tag of the operator. + * @param inputPtr A pointer identifying the input data address of the operator. + * @param count An integer(u64) identifying the number of the data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param destRank An integer identifying the destination rank. + * @param srTag An integer identifying the send/recv message tag. + * The message will be send by the receive operator with the same "sr_tag". + * @param group A string identifying the group name of ranks participating in the operator. + * @param stream A pointer identifying the stream information. + * @return hcclResult_t + */ +hcclResult_t hcom_send(const char *tag, void *inputPtr, u64 count, hcclDataType_t dataType, + u32 destRank, u32 srTag, const char *group, rtStream_t stream); -hcclResult_t hcom_receive(const char *tag, void *outputPtr, u64 count, hcclDataType_t dataType, u32 srcRank, u32 srTag, - const char *group, rtStream_t stream); +/** + * @brief Receive operator. + * + * @param tag A string identifying the tag of the operator. + * @param outputPtr A pointer identifying the output data address of the operator. + * @param count An integer(u64) identifying the number of the data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param srcRank An integer identifying the source rank. + * @param srTag An integer identifying the send/recv message tag. + * The message will be send by the send operator with the same "sr_tag". + * @param group A string identifying the group name of ranks participating in the operator. + * @param stream A pointer identifying the stream information. + * @return hcclResult_t + */ +hcclResult_t hcom_receive(const char *tag, void *outputPtr, u64 count, hcclDataType_t dataType, + u32 srcRank, u32 srTag, const char *group, rtStream_t stream); -hcclResult_t hcom_get_split_strategy(const char *group, const struct model_feature *feature, u32 maxSegmentNum, - u32 *segmentNum, u32 *segmentIdx); +/** + * @brief Get the gradient split strategy with in the group. + * + * @param group A string identifying the group name. + * @param feature A pointer identifying the feature of the model. + * @param maxSegmentNum An integer(u32) identifying the max segments of gradients. + * @param segmentNum A pointer identifying the segments number of gradients. + * @param segmentIdx A list identifying the index of end gradient in each segment. + * @return hcclResult_t + */ +hcclResult_t hcom_get_split_strategy(const char *group, const struct model_feature *feature, + u32 maxSegmentNum, u32 *segmentNum, u32 *segmentIdx); +/** + * @brief Set the gradient split strategy with in the group, according to gradient index. + * + * @param group A string identifying the group name. + * @param segmentNum An integer(u32) identifying the segments number of gradients. + * @param IdxList A list identifying the index of end gradient in each segment. + * @return hcclResult_t + */ extern hcclResult_t hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); +/** + * @brief Set the gradient split strategy with in the group, according to gradient data size. + * + * @param group A string identifying the group name. + * @param segmentNum An integer(u32) identifying the segments number of gradients. + * @param sizeList A list identifying the percent of each segment. + * @return hcclResult_t + */ extern hcclResult_t hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList); + #ifdef __cplusplus } -#endif -#endif // __HCOM_H__ +#endif // __cplusplus +#endif // HCOM_H_ diff --git a/third_party/fwkacllib/inc/mmpa/mmpa_api.h b/third_party/fwkacllib/inc/mmpa/mmpa_api.h index 01240198..ce1c9720 100644 --- a/third_party/fwkacllib/inc/mmpa/mmpa_api.h +++ b/third_party/fwkacllib/inc/mmpa/mmpa_api.h @@ -20,7 +20,7 @@ #define LINUX 0 #define WIN 1 -#if(OS_TYPE == LINUX) +#if(OS_TYPE == LINUX) //lint !e553 #ifndef _GNU_SOURCE #define _GNU_SOURCE @@ -84,7 +84,7 @@ #endif -#if(OS_TYPE == WIN) +#if(OS_TYPE == WIN) //lint !e553 #include #include #include "Windows.h" @@ -121,5 +121,5 @@ #pragma comment(lib, "Iphlpapi.lib") #endif -#endif /* _MMPA_API_H_ */ +#endif // MMPA_API_H_ diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h index 5ed1811c..ce83d143 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h @@ -20,8 +20,8 @@ #ifdef __cplusplus #if __cplusplus extern "C" { -#endif /* __cpluscplus */ -#endif /* __cpluscplus */ +#endif // __cpluscplus +#endif // __cpluscplus #define MMPA_MACINFO_DEFAULT_SIZE 18 #define MMPA_CPUDESC_DEFAULT_SIZE 64 @@ -46,8 +46,8 @@ typedef int (*mmSort)(const mmDirent **a, const mmDirent **b); typedef VOID *(*userProcFunc)(VOID *pulArg); typedef struct { - userProcFunc procFunc; - VOID *pulArg; + userProcFunc procFunc; // Callback function pointer + VOID *pulArg; // Callback function parameters } mmUserBlock_t; typedef struct { @@ -102,28 +102,28 @@ typedef enum { } mmPollType; typedef struct { - mmPollHandle handle; - mmPollType pollType; - - INT32 ioctlCode; - mmComPletionKey completionKey; - + mmPollHandle handle; // The file descriptor or handle of poll is required + mmPollType pollType; // Operation type requiring poll + // read or recv or ioctl + INT32 ioctlCode; // IOCTL operation code, dedicated to IOCTL + mmComPletionKey completionKey; // The default value is blank, which is used in windows + // The data used to receive the difference between which handle is readable } mmPollfd; typedef struct { - VOID *priv; - mmPollHandle bufHandle; - mmPollType bufType; - VOID *buf; - UINT32 bufLen; - UINT32 bufRes; + VOID *priv; // User defined private content + mmPollHandle bufHandle; // Value of handle corresponding to buf + mmPollType bufType; // Data types polled to + VOID *buf; // Data used in poll + UINT32 bufLen; // Data length used in poll + UINT32 bufRes; // Actual return length } mmPollData, *pmmPollData; typedef VOID (*mmPollBack)(pmmPollData); typedef struct { - INT32 tz_minuteswest; - INT32 tz_dsttime; // type of DST correction + INT32 tz_minuteswest; // How many minutes is it different from Greenwich + INT32 tz_dsttime; // type of DST correction } mmTimezone; typedef struct { @@ -174,14 +174,16 @@ typedef struct { typedef mode_t MODE; typedef struct { - INT32 detachFlag; - INT32 priorityFlag; - INT32 priority; - INT32 policyFlag; - INT32 policy; - - INT32 stackFlag; - UINT32 stackSize; + INT32 detachFlag; // Determine whether to set separation property 0, not to separate 1 + INT32 priorityFlag; // Determine whether to set priority 0 and not set 1 + INT32 priority; // Priority value range to be set 1-99 + INT32 policyFlag; // Set scheduling policy or not 0 do not set 1 setting + INT32 policy; // Scheduling policy value value + // MMPA_THREAD_SCHED_RR + // MMPA_THREAD_SCHED_OTHER + // MMPA_THREAD_SCHED_FIFO + INT32 stackFlag; // Set stack size or not: 0 does not set 1 setting + UINT32 stackSize; // The stack size unit bytes to be set cannot be less than MMPA_THREAD_STACK_MIN } mmThreadAttr; #ifdef __ANDROID__ @@ -210,9 +212,10 @@ typedef struct { #define M_MSG_EXCL (IPC_CREAT | IPC_EXCL) #define M_MSG_NOWAIT IPC_NOWAIT -#define M_WAIT_NOHANG WNOHANG -#define M_WAIT_UNTRACED WUNTRACED - +#define M_WAIT_NOHANG WNOHANG // Non blocking waiting +#define M_WAIT_UNTRACED \ + WUNTRACED // If the subprocess enters the suspended state, it will return immediately + // But the end state of the subprocess is ignored #define M_UMASK_USRREAD S_IRUSR #define M_UMASK_GRPREAD S_IRGRP #define M_UMASK_OTHREAD S_IROTH @@ -319,6 +322,7 @@ extern mmAtomicType mmValueInc(mmAtomicType *ptr, mmAtomicType value); extern mmAtomicType mmValueSub(mmAtomicType *ptr, mmAtomicType value); extern INT32 mmCreateTaskWithDetach(mmThread *threadHandle, mmUserBlock_t *funcBlock); +// The following 3 interfaces are to be deleted extern INT32 mmCreateNamedPipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode); extern INT32 mmOpenNamePipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode); extern VOID mmCloseNamedPipe(mmPipeHandle namedPipe[]); @@ -327,6 +331,7 @@ extern INT32 mmCreatePipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCoun extern INT32 mmOpenPipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode); extern VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount); +// Poll related interface extern mmCompletionHandle mmCreateCompletionPort(); extern VOID mmCloseCompletionPort(mmCompletionHandle handle); extern INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP, pmmPollData polledData, @@ -388,12 +393,37 @@ extern CHAR *mmDirName(CHAR *path); extern CHAR *mmBaseName(CHAR *path); extern INT32 mmGetDiskFreeSpace(const char *path, mmDiskSize *diskSize); +/* + * Function: set the thread name created by mmcreatetask + * Input: pstThreadHandle: thread ID + * name: thread name, the actual length of name must be < MMPA_THREADNAME_SIZE + * The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the + * execution failure returns EN_ERROR + */ extern INT32 mmSetThreadName(mmThread *threadHandle, const CHAR *name); +/* + * Function: get thread name + * Input: pstThreadHandle: thread ID + * size: Cache length of thread name + * name:User allocated cache for thread name, Cache length must be >= MMPA_THREADNAME_SIZE + * The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the + * execution failure returns EN_ERROR + */ extern INT32 mmGetThreadName(mmThread *threadHandle, CHAR *name, INT32 size); - +/* + * Function:Set the thread name of the currently executing thread - call inside the thread body + * Input:name:Thread name to be set + * The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the + * execution failure returns EN_ERROR + */ extern INT32 mmSetCurrentThreadName(const CHAR *name); - +/* + * Function:Get the thread name of the currently executing thread - in body call + * Input:name:The name of the thread to get, and the cache is allocated by the user,size>=MMPA_THREADNAME_SIZE + * The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the + * execution failure returns EN_ERROR + */ extern INT32 mmGetCurrentThreadName(CHAR *name, INT32 size); extern INT32 mmGetFileSize(const CHAR *fileName, ULONGLONG *length); extern INT32 mmIsDir(const CHAR *fileName); @@ -413,6 +443,6 @@ extern INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, const mmUserBloc #if __cplusplus } #endif /* __cpluscplus */ -#endif /* __cpluscplus */ +#endif // __cpluscplus -#endif /* _MMPA_LINUX_MMPA_LINUX_H_ */ +#endif // MMPA_LINUX_MMPA_LINUX_H_ diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h index 522e9e4f..fc862a72 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h @@ -20,8 +20,8 @@ #ifdef __cplusplus #if __cplusplus extern "C" { -#endif /* __cpluscplus */ -#endif /* __cpluscplus */ +#endif // __cpluscplus +#endif // __cpluscplus #ifndef FALSE #define FALSE 0 @@ -90,6 +90,6 @@ typedef long LONG; #ifdef __cplusplus #if __cplusplus } -#endif /* __cpluscplus */ -#endif /* __cpluscplus */ -#endif /* _MMPA_TYPEDEF_LINUX_H_ */ +#endif // __cpluscplus +#endif // __cpluscplus +#endif // MMPA_TYPEDEF_LINUX_H_ diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h index fe95db10..fc1b4858 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h @@ -20,8 +20,8 @@ #ifdef __cplusplus #if __cplusplus extern "C" { -#endif /* __cpluscplus */ -#endif /* __cpluscplus */ +#endif // __cpluscplus +#endif // __cpluscplus #ifndef FALSE #define FALSE 0 @@ -74,6 +74,6 @@ extern "C" { #ifdef __cplusplus #if __cplusplus } -#endif /* __cpluscplus */ -#endif /* __cpluscplus */ -#endif /* _MMPA_TYPEDEF_WIN_H_ */ +#endif // __cpluscplus +#endif // __cpluscplus +#endif // _MMPA_TYPEDEF_WIN_H_ diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h index 69185cba..ef15f371 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h @@ -19,8 +19,8 @@ #ifdef __cplusplus #if __cplusplus extern "C" { -#endif /* __cpluscplus */ -#endif /* __cpluscplus */ +#endif // __cpluscplus +#endif // __cpluscplus #ifdef MMPA_DLL #define MMPA_DLL_API __declspec(dllexport) #else @@ -75,7 +75,7 @@ typedef enum { typedef struct { unsigned char d_type; - char d_name[MAX_PATH]; + char d_name[MAX_PATH]; // file name } mmDirent; typedef int (*mmFilter)(const mmDirent *entry); @@ -88,9 +88,9 @@ typedef struct { typedef PVOID mmInAddr; typedef enum { - pollTypeRead = 1, - pollTypeRecv, - pollTypeIoctl, + pollTypeRead = 1, // pipeline reading + pollTypeRecv, // socket receive + pollTypeIoctl, // ioctl read } mmPollType; typedef struct { @@ -100,9 +100,9 @@ typedef struct { } mmComPletionKey, *pmmComPletionKey; typedef struct { - VOID *priv; - mmPollHandle bufHandle; - mmPollType bufType; + VOID *priv; // User defined private content + mmPollHandle bufHandle; // Value of handle corresponding to buf + mmPollType bufType; // Data types polled to VOID *buf; UINT32 bufLen; UINT32 bufRes; @@ -110,10 +110,11 @@ typedef struct { typedef VOID (*mmPollBack)(pmmPollData); typedef struct { - mmPollHandle handle; - mmPollType pollType; - INT32 ioctlCode; - mmComPletionKey completionKey; + mmPollHandle handle; // The file descriptor or handle of poll is required + mmPollType pollType; // Operation type requiring poll,read or recv or ioctl + INT32 ioctlCode; // IOCTL operation code, dedicated to IOCTL + mmComPletionKey completionKey; // The default value is blank, which will be used in windows to receive the data with + // different handle } mmPollfd; typedef struct { @@ -126,7 +127,7 @@ typedef OVERLAPPED mmOverLap; typedef struct { UINT32 createFlag; - INT32 oaFlag; + INT32 oaFlag; // Overlap operation is supported if it is not 0 } mmCreateFlag; typedef struct { @@ -148,8 +149,8 @@ typedef struct { } mmTimeval; typedef struct { - INT32 tz_minuteswest; - INT32 tz_dsttime; + INT32 tz_minuteswest; // How many minutes is it different from Greenwich + INT32 tz_dsttime; // DST correction type } mmTimezone; typedef struct { @@ -202,8 +203,9 @@ typedef struct { INT32 envpCount; } mmArgvEnv; +// Windows currently does not support properties other than thread separation properties typedef struct { - INT32 detachFlag; + INT32 detachFlag; // Thread detach property: 0 do not detach 1 detach INT32 priorityFlag; INT32 priority; INT32 policyFlag; @@ -256,7 +258,7 @@ typedef VOID (*mmPf)(VOID); #define MMPA_FLAG_PERMUTE 0x01 // permute non-options to the end of argv #define MMPA_FLAG_ALLARGS 0x02 // treat non-options as args to option "-1" #define MMPA_FLAG_LONGONLY 0x04 // operate as getopt_long_only -/* return values */ +// return values #define MMPA_BADCH (INT32)'?' #define MMPA_BADARG ((*options == ':') ? (INT32)':' : (INT32)'?') #define MMPA_INORDER (INT32)1 @@ -371,7 +373,6 @@ _declspec(dllexport) INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, m _declspec(dllexport) INT32 mmGetErrorCode(); _declspec(dllexport) INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone); _declspec(dllexport) mmTimespec mmGetTickCount(); - _declspec(dllexport) INT32 mmGetRealPath(CHAR *path, CHAR *realPath); _declspec(dllexport) INT32 mmRealPath(const CHAR *path, CHAR *realPath, INT32 realPathLen); @@ -422,8 +423,22 @@ _declspec(dllexport) INT32 mmGetDiskFreeSpace(const char *path, mmDiskSize *disk _declspec(dllexport) INT32 mmSetThreadName(mmThread *threadHandle, const CHAR *name); _declspec(dllexport) INT32 mmGetThreadName(mmThread *threadHandle, CHAR *name, INT32 size); +/* + * Function: set the thread name of the currently executing thread - internal call of thread, which is not supported + * under Windows temporarily, and is null. + * Input: name: the thread name to be set + * The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the + * execution failure returns EN_ERROR + */ _declspec(dllexport) INT32 mmSetCurrentThreadName(const CHAR *name); +/* + * Function: Get the thread name of the currently executing thread - thread body call, not supported under windows, null + * implementation. + * Input:name:The name of the thread to get, and the cache is allocated by the user,size>=MMPA_THREADNAME_SIZE. + * The input parameter error returns EN_INVALID_PARAM, the execution success returns + * EN_OK, and the execution failure returns EN_ERROR + */ _declspec(dllexport) INT32 mmGetCurrentThreadName(CHAR *name, INT32 size); _declspec(dllexport) INT32 mmGetFileSize(const CHAR *fileName, ULONGLONG *length); @@ -444,6 +459,6 @@ _declspec(dllexport) INT32 #if __cplusplus } #endif /* __cpluscplus */ -#endif /* __cpluscplus */ +#endif // __cpluscplus -#endif /* _MMPA_WIN_MMPA_WIN_H_ */ +#endif // MMPA_WIN_MMPA_WIN_H_ diff --git a/third_party/fwkacllib/inc/ops/aipp.h b/third_party/fwkacllib/inc/ops/aipp.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/aipp_data.h b/third_party/fwkacllib/inc/ops/aipp_data.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/all_ops.h b/third_party/fwkacllib/inc/ops/all_ops.h old mode 100755 new mode 100644 index c00b2b8a..d6350322 --- a/third_party/fwkacllib/inc/ops/all_ops.h +++ b/third_party/fwkacllib/inc/ops/all_ops.h @@ -63,7 +63,4 @@ #include "stateless_random_ops.h" #include "dvpp_ops.h" #include "basic_lstm_cell.h" -#include "fsrdetectionoutput_ops.h" -#include "roipooling_ops.h" -#include "power_ops.h" #endif // BUILT_IN_OP_PROTO_INC_ALL_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/audio_ops.h b/third_party/fwkacllib/inc/ops/audio_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/basic_lstm_cell.h b/third_party/fwkacllib/inc/ops/basic_lstm_cell.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/batch_ops.h b/third_party/fwkacllib/inc/ops/batch_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/bitwise_ops.h b/third_party/fwkacllib/inc/ops/bitwise_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/clip_boxes.h b/third_party/fwkacllib/inc/ops/clip_boxes.h deleted file mode 100644 index 6ac07fd1..00000000 --- a/third_party/fwkacllib/inc/ops/clip_boxes.h +++ /dev/null @@ -1,34 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - #ifndef GE_OP_CLIP_BOXES_H - #define GE_OP_CLIP_BOXES_H - - #include "graph/operator_reg.h" - - namespace ge { - - - REG_OP(ClipBoxes) - .INPUT(boxes_input, TensorType({DT_FLOAT16})) - .OUTPUT(boxes_output, TensorType({DT_FLOAT16})) - .REQUIRED_ATTR(img_w, Float) - .REQUIRED_ATTR(img_h, Float) - .OP_END_FACTORY_REG(ClipBoxes) - - } // namespace ge - - #endif // GE_OP_CLIP_BOXES_H diff --git a/third_party/fwkacllib/inc/ops/control_flow_ops.h b/third_party/fwkacllib/inc/ops/control_flow_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/decode_bbox.h b/third_party/fwkacllib/inc/ops/decode_bbox.h deleted file mode 100644 index 9fe95488..00000000 --- a/third_party/fwkacllib/inc/ops/decode_bbox.h +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - #ifndef GE_OP_DECODE_BBOX_H - #define GE_OP_DECODE_BBOX_H - - #include "graph/operator_reg.h" - - namespace ge { - - REG_OP(DecodeBbox) - .INPUT(box_predictions, TensorType{DT_FLOAT16}) - .INPUT(anchors, TensorType{DT_FLOAT16}) - .OUTPUT(decoded_boxes, TensorType{DT_FLOAT16}) - .REQUIRED_ATTR(decode_clip, Float) - .OP_END_FACTORY_REG(DecodeBbox) - - } // namespace ge - - #endif // GE_OP_DECODE_BBOX_H diff --git a/third_party/fwkacllib/inc/ops/decode_boundaries_target.h b/third_party/fwkacllib/inc/ops/decode_boundaries_target.h deleted file mode 100644 index 42ad7b54..00000000 --- a/third_party/fwkacllib/inc/ops/decode_boundaries_target.h +++ /dev/null @@ -1,31 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - #ifndef GE_OP_DECODE_BOUNDARIES_TARGET_H - #define GE_OP_DECODE_BOUNDARIES_TARGET_H - - #include "graph/operator_reg.h" - - namespace ge { - - REG_OP(DecodeBoundariesTarget) - .INPUT(boundary_predictions, TensorType({DT_FLOAT16})) /* "First operand." */ - .INPUT(anchors, TensorType({DT_FLOAT16})) /* "Second operand." */ - .OUTPUT(boundary_encoded, TensorType({DT_FLOAT16})) /* "Result, has same element type as two inputs" */ - .OP_END_FACTORY_REG(DecodeBoundariesTarget) - } // namespace ge - - #endif // GE_OP_DECODE_BOUNDARIES_TARGET_H diff --git a/third_party/fwkacllib/inc/ops/decode_cornerpoints_target_bg.h b/third_party/fwkacllib/inc/ops/decode_cornerpoints_target_bg.h deleted file mode 100755 index ce10175f..00000000 --- a/third_party/fwkacllib/inc/ops/decode_cornerpoints_target_bg.h +++ /dev/null @@ -1,31 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - #ifndef GE_OP_DECODE_CORNERPOINTS_TARGET_BG_H - #define GE_OP_DECODE_CORNERPOINTS_TARGET_BG_H - - #include "graph/operator_reg.h" - - namespace ge { - - REG_OP(DecodeCornerpointsTargetBG) - .INPUT(keypoints_prediction, TensorType({DT_FLOAT16})) /* "First operand." */ - .INPUT(anchors, TensorType({DT_FLOAT16})) /* "Second operand." */ - .OUTPUT(keypoints_decoded, TensorType({DT_FLOAT16})) /* "Result, has same element type as two inputs" */ - .OP_END_FACTORY_REG(DecodeCornerpointsTargetBG); - } // namespace ge - - #endif // GE_OP_DECODE_CORNERPOINTS_TARGET_BG_H diff --git a/third_party/fwkacllib/inc/ops/decode_cornerpoints_target_wrt_center_v1.h b/third_party/fwkacllib/inc/ops/decode_cornerpoints_target_wrt_center_v1.h deleted file mode 100755 index 0e96bc16..00000000 --- a/third_party/fwkacllib/inc/ops/decode_cornerpoints_target_wrt_center_v1.h +++ /dev/null @@ -1,32 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - #ifndef GE_OP_DECODE_CORNERPOINTS_TARGET_WRT_CENTER_V1_H - #define GE_OP_DECODE_CORNERPOINTS_TARGET_WRT_CENTER_V1_H - - #include "graph/operator_reg.h" - - namespace ge { - - REG_OP(DecodeCornerpointsTargetWrtCenterV1) - .INPUT(keypoints_prediction, TensorType({DT_FLOAT16})) /* "First operand." */ - .INPUT(anchors, TensorType({DT_FLOAT16})) /* "Second operand." */ - .OUTPUT(keypoints_decoded, TensorType({DT_FLOAT16})) /* "Result, has same element type as two inputs" */ - .OP_END_FACTORY_REG(DecodeCornerpointsTargetWrtCenterV1) - } // namespace ge - - #endif // GE_OP_DECODE_CORNERPOINTS_TARGET_WRT_CENTER_V1_H - diff --git a/third_party/fwkacllib/inc/ops/decode_wheels_target.h b/third_party/fwkacllib/inc/ops/decode_wheels_target.h deleted file mode 100644 index 053a6c1a..00000000 --- a/third_party/fwkacllib/inc/ops/decode_wheels_target.h +++ /dev/null @@ -1,31 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - #ifndef GE_OP_DECODE_WHEELS_TARGET_H - #define GE_OP_DECODE_WHEELS_TARGET_H - - #include "graph/operator_reg.h" - - namespace ge { - - REG_OP(DecodeWheelsTarget) - .INPUT(boundary_predictions, TensorType({DT_FLOAT16})) /* "First operand." */ - .INPUT(anchors, TensorType({DT_FLOAT16})) /* "Second operand." */ - .OUTPUT(boundary_encoded, TensorType({DT_FLOAT16})) /* "Result, has same element type as two inputs" */ - .OP_END_FACTORY_REG(DecodeWheelsTarget) - } // namespace ge - - #endif // GE_OP_DECODE_WHEELS_TARGET_H diff --git a/third_party/fwkacllib/inc/ops/dvpp_ops.h b/third_party/fwkacllib/inc/ops/dvpp_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index 6b433acf..ba043d5a 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -37,6 +37,29 @@ REG_OP(AddN) .REQUIRED_ATTR(N, Int) .OP_END_FACTORY_REG(AddN) +/** +*@brief Calculates the reversed outputs of the function "maximum" + +*@par Inputs: +*Three inputs, including: +* @li grads: A mutable Tensor. Must be one of the following types: +* float16, float32, int32. +* @li x1: A mutable Tensor of the same type as "grads". +* @li x2: A mutable Tensor of the same type as "grads". + +*@par Attributes: +*@li grad_x: An optional bool. Defaults to "True". +* If "True", "y1" will be output. +* If "False", "y1" will not be output. + +*@li grad_y: An optional bool. Defaults to "True". +* If "True", "y2" will be output. +* If "False", "y2" will not be output. + +*@par Outputs: +* @li y1: A mutable Tensor. Has the same type as "grads". +* @li y2: A mutable Tensor. Has the same type as "grads". +*/ REG_OP(MaximumGrad) .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) @@ -134,6 +157,18 @@ REG_OP(GreaterEqual) .OUTPUT(y, TensorType({DT_BOOL})) .OP_END_FACTORY_REG(GreaterEqual) +/** +*@brief Returns the truth value of (x1 < x2) element-wise. + +*@par Inputs: +*Two inputs, including: +* @li x1: A Tensor. Must be one of the following types: float16, float32, double, int32, +* uint8, int16, int8, int64, uint16, uint32, uint64. +* @li x2: A Tensor with the same type as "x1". + +*@par Outputs: +*y: A Tensor of type bool. +*/ REG_OP(Less) .INPUT(x1, TensorType::RealNumberType()) .INPUT(x2, TensorType::RealNumberType()) @@ -237,6 +272,17 @@ REG_OP(Reciprocal) DT_COMPLEX64, DT_COMPLEX128})) .OP_END_FACTORY_REG(Reciprocal) +/** +*@brief Returns x - y element-wise. +*@par Inputs: +*Two inputs, including: +* @li x1: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, float64, +* float16, float32, complex128, complex64, uint16. +* @li x2: A Tensor of the same type as "x1". + +*@par Outputs: +*y: A Tensor. Has the same type as "x". +*/ REG_OP(Sub) .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8, DT_UINT16, DT_INT16, DT_INT32, DT_INT64, @@ -249,6 +295,16 @@ REG_OP(Sub) DT_COMPLEX64, DT_COMPLEX128})) .OP_END_FACTORY_REG(Sub) +/** +*@ brief computes the absolute value of a tensor. + +*@par Inputs: +*One inputs, including: +* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. + +*@par Outputs: +*y: A Tensor. Has the same type as "x". +*/ REG_OP(Abs) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64})) @@ -930,6 +986,19 @@ REG_OP(Add) DT_COMPLEX64, DT_STRING})) .OP_END_FACTORY_REG(Add) +/** +*@brief Confuse broadcast, add and mul. + +*@par Inputs: +*Five inputs, including: +* @li x1: A Tensor. Must be one of the following types:int32 float16, float32. +* @li x2: A Tensor of the same type as "x1". +* @li x3: A Tensor of the same type as "x1". + +*@par Outputs: +*@li y: A Tensor. Has the same type as "x1". +*/ + REG_OP(FusedMulAdd) .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) @@ -1651,6 +1720,15 @@ REG_OP(Ceil) .OUTPUT(y, TensorType::FloatingDataType()) .OP_END_FACTORY_REG(Ceil) +/** +*@brief Returns element-wise largest integer not greater than "x". + +*@par Inputs: +*x: A Tensor of type float16 or float32. + +*@par Outputs: +*y: A Tensor of the same type as "x". +*/ REG_OP(Floor) .INPUT(x, TensorType::FloatingDataType()) .OUTPUT(y, TensorType::FloatingDataType()) @@ -1970,6 +2048,22 @@ REG_OP(ArgMinWithValue) .ATTR(keep_dims, Bool, false) .OP_END_FACTORY_REG(ArgMinWithValue) +/** +*@brief Compute elementwise modes, such as 0: PRODUCT, 1: SUM, 2: MAX + +*@par Inputs: +*One input: \n +*x: the list of input data, the type of element is Tensor that \n +* should met one of the following types: +* float16, float32 + +*@par Attributes: +*@li model: An optional attribute. Defaults to "1". +* "0": product, "1": sum, "2": max. +*@li coeff: A required attribute. Must met all of following rules: +* size of "coeff" must be equal to len("x"). +* element of "coeff" must be a number. +*/ REG_OP(Eltwise) .DYNAMIC_INPUT(__input, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1991,6 +2085,33 @@ REG_OP(PopulationCount) .OUTPUT(y, TensorType({DT_UINT8})) .OP_END_FACTORY_REG(PopulationCount) +/** +*@brief A fusion operator for bert lamb. + +*@par Inputs: +*Thirteen inputs, including: +* @li input1: A Tensor. Must be one of the following types: float16, float32. +* @li input2: A Tensor. Must be one of the following types: float16, float32. +* @li input3: A Tensor. Must be one of the following types: float16, float32. +* @li input4: A Tensor. Must be one of the following types: float16, float32. +* @li input5: A Tensor. Must be one of the following types: float16, float32. +* @li input6: A Tensor. Must be one of the following types: float16, float32. +* @li input7: A Tensor. Must be one of the following types: float16, float32. +* @li input8: A Tensor. Must be one of the following types: float16, float32. +* @li input9: A Tensor. Must be one of the following types: float16, float32. +* @li inputx0: A Tensor. Must be one of the following types: float16, float32. +* @li inputx1: A Tensor. Must be one of the following types: float16, float32. +* @li inputx2: A Tensor. Must be one of the following types: float16, float32. +* @li inputx3: A Tensor. Must be one of the following types: float16, float32. + +*@par Outputs: +*Four outputs, including: +* @li output1: A Tensor. Must be one of the following types: float16, float32. +* @li output2: A Tensor. Must be one of the following types: float16, float32. +* @li output3: A Tensor. Must be one of the following types: float16, float32. +* @li output4: A Tensor. Must be one of the following types: float16, float32. + +*/ REG_OP(LambNextMVWithDecay) .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2011,6 +2132,33 @@ REG_OP(LambNextMVWithDecay) .OUTPUT(output4, TensorType({DT_FLOAT16,DT_FLOAT})) .OP_END_FACTORY_REG(LambNextMVWithDecay) +/** +*@brief A fusion operator for bert lamb. + +*@par Inputs: +*Thirteen inputs, including: +* @li input1: A Tensor. Must be one of the following types: float16, float32. +* @li input2: A Tensor. Must be one of the following types: float16, float32. +* @li input3: A Tensor. Must be one of the following types: float16, float32. +* @li input4: A Tensor. Must be one of the following types: float16, float32. +* @li input5: A Tensor. Must be one of the following types: float16, float32. +* @li input6: A Tensor. Must be one of the following types: float16, float32. +* @li input7: A Tensor. Must be one of the following types: float16, float32. +* @li input8: A Tensor. Must be one of the following types: float16, float32. +* @li input9: A Tensor. Must be one of the following types: float16, float32. +* @li inputx0: A Tensor. Must be one of the following types: float16, float32. +* @li inputx1: A Tensor. Must be one of the following types: float16, float32. +* @li inputx2: A Tensor. Must be one of the following types: float16, float32. +* @li inputx3: A Tensor. Must be one of the following types: float16, float32. + +*@par Outputs: +*Four outputs, including: +* @li output1: A Tensor. Must be one of the following types: float16, float32. +* @li output2: A Tensor. Must be one of the following types: float16, float32. +* @li output3: A Tensor. Must be one of the following types: float16, float32. +* @li output4: A Tensor. Must be one of the following types: float16, float32. + +*/ REG_OP(LambNextMVWithDecayV1) .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2031,6 +2179,32 @@ REG_OP(LambNextMVWithDecayV1) .OUTPUT(output4, TensorType({DT_FLOAT16,DT_FLOAT})) .OP_END_FACTORY_REG(LambNextMVWithDecayV1) +/** +*@brief Confuse real_div, rsqrt, sqrt, maximum, minimum, sub and add. + +*@par Inputs: +*Thirteen inputs, including: +* @li input1: A Tensor. Must be one of the following types: float16, float32. +* @li input2: A Tensor of the same type as "input1". +* @li input3: A Tensor of the same type as "input1". +* @li input4: A Tensor of the same type as "input1". +* @li input5: A Tensor of the same type as "input1". +* @li input6: A Tensor. Must be one of the following types: float16, float32. +* @li input7: A Tensor of the same type as "input1". +* @li input8: A Tensor of the same type as "input1". +* @li input9: A Tensor of the same type as "input1". +* @li inputx0: A Tensor of the same type as "input1". +* @li inputx1: A Tensor. Must be one of the following types: float16, float32. +* @li inputx2: A Tensor of the same type as "input1". +* @li inputx3: A Tensor of the same type as "input1". + +*@par Outputs: +*Four outputs, including: +*@li output1: A Tensor. Has the same type as "input1". +*@li output2: A Tensor. Has the same type as "input1". +*@li output3: A Tensor. Has the same type as "input1". +*@li output4: A Tensor. Has the same type as "input1". +*/ REG_OP(LambNextMV) .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2051,6 +2225,24 @@ REG_OP(LambNextMV) .OUTPUT(output4, TensorType({DT_FLOAT16,DT_FLOAT})) .OP_END_FACTORY_REG(LambNextMV) +/** +*@brief A fusion operator for bert lamb. + +*@par Inputs: +*Six inputs, including: +* @li input1: A Tensor. Must be one of the following types: float16, float32. +* @li input2: A Tensor. Must be one of the following types: float16, float32. +* @li mul2_x: A Tensor. Must be one of the following types: float16, float32. +* @li mul3_x: A Tensor. Must be one of the following types: float16, float32. +* @li truediv1_recip: A Tensor. Must be one of the following types: float16, float32. +* @li add2_y: A Tensor. Must be one of the following types: float16, float32. + +*@par Outputs: +*Two outputs, including: +* @li output1: A Tensor of the same type as "input1". +* @li output2: A Tensor of the same type as "input1". + +*/ REG_OP(LambNextRight) .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2062,6 +2254,22 @@ REG_OP(LambNextRight) .OUTPUT(output2, TensorType({DT_FLOAT16,DT_FLOAT})) .OP_END_FACTORY_REG(LambNextRight) +/** +*@brief Confuse broadcast, add and mul. + +*@par Inputs: +*Five inputs, including: +* @li input1: A Tensor. Must be one of the following types: float16, float32. +* @li input2: A Tensor of the same type as "input1". +* @li mul_x: A Tensor of the same type as "input1". +* @li mul1_x: A Tensor of the same type as "input1". +* @li truediv_recip: A Tensor of the same type as "input1". + +*@par Outputs: +*Two outputs, including: +*@li output1: A Tensor. Has the same type as "input1". +*@li output2: A Tensor. Has the same type as "input1". +*/ REG_OP(LambNextLeft) .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2072,6 +2280,25 @@ REG_OP(LambNextLeft) .OUTPUT(output2, TensorType({DT_FLOAT16,DT_FLOAT})) .OP_END_FACTORY_REG(LambNextLeft) +/** +*@brief A fusion operator for bert lamb. + +*@par Inputs: +*Six inputs, including: +* @li input1: A Tensor. Must be one of the following types: float16, float32. +* @li input2: A Tensor. Must be one of the following types: float16, float32. +* @li input3: A Tensor. Must be one of the following types: float16, float32. +* @li input4: A Tensor. Must be one of the following types: float16, float32. +* @li input5: A Tensor. Must be one of the following types: float16, float32. +* @li input6: A Tensor. Must be one of the following types: float16, float32. +* @li input7: A Tensor. Must be one of the following types: float16, float32. +* @li input8: A Tensor. Must be one of the following types: float16, float32. +* @li input9: A Tensor. Must be one of the following types: float16, float32. + +*@par Outputs: +*output_y: A Tensor of the same type as "input1". + +*/ REG_OP(LambUpdateWithLr) .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2085,6 +2312,23 @@ REG_OP(LambUpdateWithLr) .OUTPUT(output_y, TensorType({DT_FLOAT16,DT_FLOAT})) .OP_END_FACTORY_REG(LambUpdateWithLr) +/** +*@brief A fusion operator for bert lamb. + +*@par Inputs: +*Seven inputs, including: +* @li x1: A Tensor. Must be one of the following types: float16, float32. +* @li x2: A Tensor. Must be one of the following types: float16, float32. +* @li x3: A Tensor. Must be one of the following types: float16, float32. +* @li x4: A Tensor. Must be one of the following types: float16, float32. +* @li x5: A Tensor. Must be one of the following types: float16, float32. +* @li greater_y: A Tensor. Must be one of the following types: float16, float32. +* @li select_e: A Tensor. Must be one of the following types: float16, float32. + +*@par Outputs: +*y: A Tensor of the same type as input. + +*/ REG_OP(LambUpdateWithLrV2) .INPUT(x1, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(x2, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2096,6 +2340,30 @@ REG_OP(LambUpdateWithLrV2) .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) .OP_END_FACTORY_REG(LambUpdateWithLrV2) +/** +*@brief A fusion operator for bert lamb. + +*@par Inputs: +*Eleven inputs, including: +* @li input0: A Tensor. Must be one of the following types: float16, float32. +* @li input1: A Tensor. Must be one of the following types: float16, float32. +* @li input2: A Tensor. Must be one of the following types: float16, float32. +* @li input3: A Tensor. Must be one of the following types: float16, float32. +* @li input4: A Tensor. Must be one of the following types: float16, float32. +* @li mul0_x: A Tensor. Must be one of the following types: float16, float32. +* @li mul1_x: A Tensor. Must be one of the following types: float16, float32. +* @li mul2_x: A Tensor. Must be one of the following types: float16, float32. +* @li mul3_x: A Tensor. Must be one of the following types: float16, float32. +* @li mul4_x: A Tensor. Must be one of the following types: float16, float32. +* @li add2_y: A Tensor. Must be one of the following types: float16, float32. + +*@par Outputs: +*Three outputs, including: +* @li output0: A Tensor. Must be one of the following types: float16, float32. +* @li output1: A Tensor. Must be one of the following types: float16, float32. +* @li output2: A Tensor. Must be one of the following types: float16, float32. + +*/ REG_OP(AdamApplyOneWithDecay) .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2113,6 +2381,29 @@ REG_OP(AdamApplyOneWithDecay) .OUTPUT(output2, TensorType({DT_FLOAT16,DT_FLOAT})) .OP_END_FACTORY_REG(AdamApplyOneWithDecay) +/** +*@brief A fusion operator for bert lamb. + +*@par Inputs: +*Ten inputs, including: +* @li input0: A Tensor. Must be one of the following types: float16, float32. +* @li input1: A Tensor. Must be one of the following types: float16, float32. +* @li input2: A Tensor. Must be one of the following types: float16, float32. +* @li input3: A Tensor. Must be one of the following types: float16, float32. +* @li input4: A Tensor. Must be one of the following types: float16, float32. +* @li mul0_x: A Tensor. Must be one of the following types: float16, float32. +* @li mul1_x: A Tensor. Must be one of the following types: float16, float32. +* @li mul2_x: A Tensor. Must be one of the following types: float16, float32. +* @li mul3_x: A Tensor. Must be one of the following types: float16, float32. +* @li add2_y: A Tensor. Must be one of the following types: float16, float32. + +*@par Outputs: +*Three outputs, including: +* @li output0: A Tensor. Must be one of the following types: float16, float32. +* @li output1: A Tensor. Must be one of the following types: float16, float32. +* @li output2: A Tensor. Must be one of the following types: float16, float32. + +*/ REG_OP(AdamApplyOne) .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2129,6 +2420,20 @@ REG_OP(AdamApplyOne) .OUTPUT(output2, TensorType({DT_FLOAT16,DT_FLOAT})) .OP_END_FACTORY_REG(AdamApplyOne) +/** +*@brief Confuse select, maximum, greater and sqrt. + +*@par Inputs: +*Four inputs, including: +* @li input_x: A Tensor. Must be one of the following types: float16, float32. +* @li input1: A Tensor. Must be one of the following types: float16, float32. +* @li input2: A Tensor. Must be one of the following types: float16, float32. +* @li input3: A Tensor. Must be one of the following types: float16, float32. + +*@par Outputs: +*output_y: A Tensor of the same type as "input_x". + +*/ REG_OP(ClipByNormNoDivSum) .INPUT(input_x, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2137,6 +2442,22 @@ REG_OP(ClipByNormNoDivSum) .OUTPUT(output_y, TensorType({DT_FLOAT16,DT_FLOAT})) .OP_END_FACTORY_REG(ClipByNormNoDivSum) +/** +*@brief Confuse reducesumd and square. + +*@par Inputs: +*x: A Tensor of type float16, float32. + +*@par Attributes: +* Two attributes, including: \n +*@li axis: A optional listint, specifies the dimensions to reduce. +*@li keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false". + +*@par Outputs: +*Two outputs, including: \n +*@li y1: A Tensor. Has the same type as "x". +*@li y2: A Tensor. Has the same type as "x". +*/ REG_OP(SquareSumV2) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) .OUTPUT(y1, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2144,6 +2465,21 @@ REG_OP(SquareSumV2) .ATTR(axis, ListInt, {}) .ATTR(keep_dims, Bool, false) .OP_END_FACTORY_REG(SquareSumV2) + +/** +*@brief Confuse reducesumd and square. + +*@par Inputs: +*x: A Tensor of type float16, float32. + +*@par Attributes: +* Two attributes, including: \n +*@li axis: A optional listint, specifies the dimensions to reduce. +*@li keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false". + +*@par Outputs: +y: A Tensor. Has the same type as "x". +*/ REG_OP(SquareSumV1) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2151,13 +2487,25 @@ REG_OP(SquareSumV1) .ATTR(keep_dims, Bool, false) .OP_END_FACTORY_REG(SquareSumV1) +/** +*@brief Confuse broadcast, addn and mul. + +*@par Inputs: +*Five inputs, including: +* @li x1: A Tensor. Must be one of the following types:int32 float16, float32. +* @li x2: A Tensor of the same type as "x1". +* @li x3: A Tensor of the same type as "x1". + +*@par Outputs: +*@li y: A Tensor. Has the same type as "x1". +*/ REG_OP(FusedMulAddN) .INPUT(x1, TensorType::NumberType()) .INPUT(x2, TensorType::NumberType()) .INPUT(x3, TensorType::NumberType()) .OUTPUT(y, TensorType::NumberType()) .OP_END_FACTORY_REG(FusedMulAddN) - + /** *@brief Add 'bias' to 'x'. @@ -2230,6 +2578,16 @@ REG_OP(LRNGrad) .ATTR(beta, Float, 0.5) .OP_END_FACTORY_REG(LRNGrad) +REG_OP(ConfusionMulGrad) + .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(output1, TensorType({DT_FLOAT16,DT_FLOAT})) + .ATTR(axis, ListInt, {}) + .ATTR(keep_dims, Bool, false) + .OP_END_FACTORY_REG(ConfusionMulGrad) + REG_OP(LRN) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/fsrdetectionoutput_ops.h b/third_party/fwkacllib/inc/ops/fsrdetectionoutput_ops.h deleted file mode 100755 index 8fe21d37..00000000 --- a/third_party/fwkacllib/inc/ops/fsrdetectionoutput_ops.h +++ /dev/null @@ -1,39 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_OP_FSRDETECTIONOUTPUT_OPS_H_ -#define GE_OP_FSRDETECTIONOUTPUT_OPS_H_ -#include "graph/operator_reg.h" - -namespace ge { -REG_OP(FSRDetectionOutput) - .INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(prior_box, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(score, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(actual_rois_num, TensorType({DT_INT32})) - .OUTPUT(actual_bbox_num, TensorType({DT_INT32})) - .OUTPUT(box, TensorType({DT_FLOAT, DT_FLOAT16})) - .ATTR(batch_rois, Int, 1024) - .ATTR(im_info, ListInt, {375,1024}) - .ATTR(num_classes, Int, 80) - .ATTR(max_rois_num, Int, 1024) - .ATTR(score_thresh, Float, 0.45) - .ATTR(nms_thresh, Float, 0.7) - .ATTR(bbox_reg_weights, ListInt, {1,1,1,1}) - .ATTR(post_nms_topn, Int, 304) - .OP_END_FACTORY_REG(FSRDetectionOutput) -} -#endif diff --git a/third_party/fwkacllib/inc/ops/functional_ops.h b/third_party/fwkacllib/inc/ops/functional_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/get_data_ops.h b/third_party/fwkacllib/inc/ops/get_data_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/hcom_ops.h b/third_party/fwkacllib/inc/ops/hcom_ops.h index c2a50308..598f3d11 100644 --- a/third_party/fwkacllib/inc/ops/hcom_ops.h +++ b/third_party/fwkacllib/inc/ops/hcom_ops.h @@ -1,195 +1,196 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_OP_HCOM_OPS_H_ -#define GE_OP_HCOM_OPS_H_ - -#include "graph/operator_reg.h" - -namespace ge { -/** - * @brief Outputs a tensor gathering all input tensors. - * @par Inputs: - * x: A tensor. Must be one of the following types: int8, int32, float16, - * float32. - * @par Attributes: - * @li rank_size: An integer identifying the number of ranks participating in - * the op. - * @li group: A string identifying the group name of ranks participating in - * the op. - * @par Outputs: - * y: A Tensor. Has the same type as "x". - * @attention Constraints:\n - * "group" is limited to 128 characters. Use "hccl_world_group" - * as the name of a world group. - */ -REG_OP(HcomAllGather) - .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) - .REQUIRED_ATTR(rank_size, Int) - .REQUIRED_ATTR(group, String) - .ATTR(alpha, Float, 1.0) - .ATTR(beta, Float, 0.0) - .OP_END_FACTORY_REG(HcomAllGather) - -/** - * @brief Outputs a tensor containing the reduction across all input tensors - * passed to op. - * @par Inputs: - * x: A tensor. Must be one of the following types: int8, int32, float16, - * float32. - * @par Attributes: - * @li reduction: A string identifying the reduction operation to perform. \n - * The supported operation are: "sum", "max", "min", "prod". - * @li group: A string identifying the group name of ranks participating in - * the op. - * @li fusion: An optional integer identifying the fusion flag of the op. \n - * 0: no fusion; other (default): fusion. - * @par Outputs: - * y: A Tensor. Has the same type as "x". - * @attention Constraints: \n - * "group" is limited to 128 characters. Use "hccl_world_group" - * as the name of a world group. - */ -REG_OP(HcomAllReduce) - .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) - .REQUIRED_ATTR(reduction, String) - .REQUIRED_ATTR(group, String) - .ATTR(fusion, Int, 1) - .ATTR(alpha, Float, 1.0) - .ATTR(beta, Float, 0.0) - .OP_END_FACTORY_REG(HcomAllReduce) - -/** - * @brief Broadcasts the input tensor in root rank to all ranks. - * @par Inputs: - * x: A tensor. Must be one of the following types: int8, int32, float16, - * float32. - * @par Attributes: - * @li root_rank: An integer identifying the root rank in the op input of - * this rank will be broadcast to other ranks. - * @li group: A string identifying the group name of ranks participating in - * the op. - * @par Outputs: - * y: A Tensor. Has the same type as "x". - * @attention Constraints:\n - * "group" is limited to 128 characters. Use "hccl_world_group" - * as the name of a world group. - */ -REG_OP(HcomBroadcast) - .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) - .DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) - .REQUIRED_ATTR(root_rank, Int) - .REQUIRED_ATTR(group, String) - .ATTR(alpha, Float, 1.0) - .ATTR(beta, Float, 0.0) - .OP_END_FACTORY_REG(HcomBroadcast) - -/** - * @brief Performs reduction across all input tensors, scattering in equal - * blocks among ranks, each rank getting a chunk of data based on its rank - * index. - * @par Inputs: - * x: A tensor. Must be one of the following types: int8, int32, float16, - * float32. - * @par Attributes: - * @li reduction: A string identifying the reduction operation to perform. \n - * The supported operation are: "sum", "max", "min", "prod". - * @li group: A string identifying the group name of ranks participating in - * the op. - * @li rank_size: An integer identifying the number of ranks participating in - * the op. - * @par Outputs: - * y: A Tensor. Has the same type as "x". - * @attention Constraints:\n - * "group" is limited to 128 characters. Use "hccl_world_group" - * as the name of a world group. - */ -REG_OP(HcomReduceScatter) - .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) - .REQUIRED_ATTR(reduction, String) - .REQUIRED_ATTR(group, String) - .REQUIRED_ATTR(rank_size, Int) - .ATTR(alpha, Float, 1.0) - .ATTR(beta, Float, 0.0) - .OP_END_FACTORY_REG(HcomReduceScatter) - -/** - * @brief Sends the input tensor to destination rank. - * @par Inputs: - * x: A tensor. Must be one of the following types: int8, int32, float16, - * float32. - * @par Attributes: - * @li sr_tag: An integer identifying the send/recv message tag. The message - * will be received by the HcomReceive op with the same "sr_tag". - * @li dest_rank: An integer identifying the destination rank. - * @li group: A string identifying the group name of ranks participating in - * the op. - * @par Outputs: - * None. - * @attention Constraints:\n - * @li "group" is limited to 128 characters. Use - * "hccl_world_group" as the name of a world group. - * @li Operators HcomSend and HcomReceive have the same "sr_tag". - * @see HcomReceive -*/ -REG_OP(HcomSend) - .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) - .REQUIRED_ATTR(group, String) - .REQUIRED_ATTR(sr_tag, Int) - .REQUIRED_ATTR(dest_rank, Int) - .ATTR(alpha, Float, 1.0) - .ATTR(beta, Float, 0.0) - .OP_END_FACTORY_REG(HcomSend) - -/** - * @brief Receives the tensor from source rank. - * @par Inputs: - * None. - * @par Attributes: - * @li sr_tag: An integer identifying the send/recv message tag. The message - * will be send by the HcomSend op with the same "sr_tag". - * @li src_rank: An integer identifying the source rank. - * @li group: A string identifying the group name of ranks participating in - * the op. - * @li shape: A list identifying the shape of the tensor to be received. - * @li dtype: An integer identifying the type of the tensor to be received. \n - * The supported types are: int8, int32, float16, float32. - * @par Outputs: - * y: A tensor with type identified in "dtype". - * @attention Constraints:\n - * @li "group" is limited to 128 characters. Use - * "hccl_world_group" as the name of a world group. - * @li Operators HcomSend and HcomReceive have the same "sr_tag". - * @li "shape" should be same as the input tensor of HcomSend. - * @li "dtype" should be same as the input tensor of HcomSend. - * @see HcomSend -*/ -REG_OP(HcomReceive) - .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) - .REQUIRED_ATTR(group, String) - .REQUIRED_ATTR(sr_tag, Int) - .REQUIRED_ATTR(src_rank, Int) - .REQUIRED_ATTR(shape, ListInt) - .REQUIRED_ATTR(dtype, Type) - .ATTR(alpha, Float, 1.0) - .ATTR(beta, Float, 0.0) - .OP_END_FACTORY_REG(HcomReceive) - -} -#endif +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_OP_HCOM_OPS_H_ +#define GE_OP_HCOM_OPS_H_ + +#include "graph/operator_reg.h" + +namespace ge { +/** + * @brief Outputs a tensor gathering all input tensors. + * @par Inputs: + * x: A tensor. Must be one of the following types: int8, int32, float16, + * float32. + * @par Attributes: + * @li rank_size: A required integer identifying the number of ranks + * participating in the op. + * @li group: A required string identifying the group name of ranks + * participating in the op. + * @par Outputs: + * y: A Tensor. Has the same type as "x". + * @attention Constraints:\n + * "group" is limited to 128 characters. Use "hccl_world_group" + * as the name of a world group. + */ +REG_OP(HcomAllGather) + .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) + .REQUIRED_ATTR(rank_size, Int) + .REQUIRED_ATTR(group, String) + .ATTR(alpha, Float, 1.0) + .ATTR(beta, Float, 0.0) + .OP_END_FACTORY_REG(HcomAllGather) + +/** + * @brief Outputs a tensor containing the reduction across all input tensors + * passed to op. + * @par Inputs: + * x: A tensor. Must be one of the following types: int8, int32, float16, + * float32. + * @par Attributes: + * @li reduction: A required string identifying the reduction operation to + * perform.The supported operation are: "sum", "max", "min", "prod". + * @li group: A required string identifying the group name of ranks + * participating in the op. + * @li fusion: An optional integer identifying the fusion flag of the op. \n + * 0: no fusion; 1 (default): fusion. + * @par Outputs: + * y: A Tensor. Has the same type as "x". + * @attention Constraints: \n + * "group" is limited to 128 characters. Use "hccl_world_group" + * as the name of a world group. + */ +REG_OP(HcomAllReduce) + .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) + .REQUIRED_ATTR(reduction, String) + .REQUIRED_ATTR(group, String) + .ATTR(fusion, Int, 1) + .ATTR(alpha, Float, 1.0) + .ATTR(beta, Float, 0.0) + .OP_END_FACTORY_REG(HcomAllReduce) + +/** + * @brief Broadcasts the input tensor in root rank to all ranks. + * @par Inputs: + * x: A list of dynamic input tensor. Must be one of the following types: + * int8, int32, float16, float32. + * @par Attributes: + * @li root_rank: A required integer identifying the root rank in the op + * input of this rank will be broadcast to other ranks. + * @li group: A required string identifying the group name of ranks + * participating in the op. + * @par Outputs: + * y: A list of dynamic output tensor. Has the same type and length as "x". + * @attention Constraints:\n + * "group" is limited to 128 characters. Use "hccl_world_group" + * as the name of a world group. + */ +REG_OP(HcomBroadcast) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) + .DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) + .REQUIRED_ATTR(root_rank, Int) + .REQUIRED_ATTR(group, String) + .ATTR(alpha, Float, 1.0) + .ATTR(beta, Float, 0.0) + .OP_END_FACTORY_REG(HcomBroadcast) + +/** + * @brief Performs reduction across all input tensors, scattering in equal + * blocks among ranks, each rank getting a chunk of data based on its rank + * index. + * @par Inputs: + * x: A tensor. Must be one of the following types: int8, int32, float16, + * float32. + * @par Attributes: + * @li reduction: A required string identifying the reduction operation to + * perform. The supported operation are: "sum", "max", "min", "prod". + * @li group: A required string identifying the group name of ranks + * participating in the op. + * @li rank_size: A required integer identifying the number of ranks + * participating in the op. + * @par Outputs: + * y: A Tensor. Has the same type as "x". + * @attention Constraints:\n + * "group" is limited to 128 characters. Use "hccl_world_group" + * as the name of a world group. + */ +REG_OP(HcomReduceScatter) + .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) + .REQUIRED_ATTR(reduction, String) + .REQUIRED_ATTR(group, String) + .REQUIRED_ATTR(rank_size, Int) + .ATTR(alpha, Float, 1.0) + .ATTR(beta, Float, 0.0) + .OP_END_FACTORY_REG(HcomReduceScatter) + +/** + * @brief Sends the input tensor to destination rank. + * @par Inputs: + * x: A tensor. Must be one of the following types: int8, int32, float16, + * float32. + * @par Attributes: + * @li sr_tag: A required integer identifying the send/recv message tag. The + * message will be received by the HcomReceive op with the same "sr_tag". + * @li dest_rank: A required integer identifying the destination rank. + * @li group: A string identifying the group name of ranks participating in + * the op. + * @par Outputs: + * None. + * @attention Constraints:\n + * @li "group" is limited to 128 characters. Use + * "hccl_world_group" as the name of a world group. + * @li Operators HcomSend and HcomReceive have the same "sr_tag". + * @see HcomReceive +*/ +REG_OP(HcomSend) + .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) + .REQUIRED_ATTR(group, String) + .REQUIRED_ATTR(sr_tag, Int) + .REQUIRED_ATTR(dest_rank, Int) + .ATTR(alpha, Float, 1.0) + .ATTR(beta, Float, 0.0) + .OP_END_FACTORY_REG(HcomSend) + +/** + * @brief Receives the tensor from source rank. + * @par Inputs: + * None. + * @par Attributes: + * @li sr_tag: A required integer identifying the send/recv message tag. The + * message will be send by the HcomSend op with the same "sr_tag". + * @li src_rank: A required integer identifying the source rank. + * @li group: A required string identifying the group name of ranks + * participating in the op. + * @li shape: A required list identifying the shape of the tensor to be + * received. + * @li dtype: A required integer identifying the type of the tensor to be + * received. The supported types are: int8, int32, float16, float32. + * @par Outputs: + * y: A tensor with type identified in "dtype". + * @attention Constraints:\n + * @li "group" is limited to 128 characters. Use + * "hccl_world_group" as the name of a world group. + * @li Operators HcomSend and HcomReceive have the same "sr_tag". + * @li "shape" should be same as the input tensor of HcomSend. + * @li "dtype" should be same as the input tensor of HcomSend. + * @see HcomSend +*/ +REG_OP(HcomReceive) + .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_FLOAT16})) + .REQUIRED_ATTR(group, String) + .REQUIRED_ATTR(sr_tag, Int) + .REQUIRED_ATTR(src_rank, Int) + .REQUIRED_ATTR(shape, ListInt) + .REQUIRED_ATTR(dtype, Type) + .ATTR(alpha, Float, 1.0) + .ATTR(beta, Float, 0.0) + .OP_END_FACTORY_REG(HcomReceive) + +} // namespace ge +#endif // GE_OP_HCOM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/logging_ops.h b/third_party/fwkacllib/inc/ops/logging_ops.h index 0dc0bd24..cf012dba 100644 --- a/third_party/fwkacllib/inc/ops/logging_ops.h +++ b/third_party/fwkacllib/inc/ops/logging_ops.h @@ -34,6 +34,11 @@ REG_OP(Assert) .ATTR(summarize, Int, 3) .OP_END_FACTORY_REG(Assert) +REG_OP(Print) +.DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, + DT_INT64, DT_UINT32, DT_UINT64, DT_DOUBLE, DT_STRING})) +.OP_END_FACTORY_REG(Print) + REG_OP(PrintV2) .INPUT(x, TensorType({DT_STRING})) .ATTR(output_stream, String, "stderr") diff --git a/third_party/fwkacllib/inc/ops/lookup_ops.h b/third_party/fwkacllib/inc/ops/lookup_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h old mode 100755 new mode 100644 index 53371b31..e11922c0 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -316,21 +316,33 @@ REG_OP(ScatterUpdate) .OP_END_FACTORY_REG(ScatterUpdate) /** -*@brief Update relevant entries in '*var' according to the Ftrl-proximal scheme. - -*@par Inputs: -* Four inputs, including: -*@li var: An NCHW, NHWC, or ND Tensor of type float32. -*@li accum: An NCHW, NHWC, or ND Tensor of type float32. -*@li grad: An NCHW, NHWC, or ND Tensor of type float32. -*@li indices: An NCHW, NHWC, or ND Tensor of type int32. - -*@par Attributes: -*@li lr: Required, used for computation. -*@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock. - -*@par Outputs: -*var: A Tensor. Has the same type and format as input "var". +* @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme. +* That is for rows we have grad for, we update var, accum and linear + +* @par Inputs: +* Ten inputs, including: +* @li var: A mutable Tensor. Must be of type TensorType::NumberType(). +* Should be a Variable Tensor. +* @li accum: A mutable Tensor of the same type as "var". +* Should be a Variable Tensor. +* @li linear: A mutable Tensor of the same type as "var". +* Should be a Variable Tensor. +* @li grad: A Tensor of the same type as "var", for the gradient. +* @li indices: A vector of indices into the first dimension of var and accum. +* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. +* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar. +* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar. +* @li l2_shrinkage: A Tensor of the same type as "var", L2 shrinkage regulariation. Must be a scalar. +* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. + +* @par Attributes: +* use_locking: An optional bool. Defaults to "False". +* If "True", updating of the "var" and "accum" tensors will be +* rotected by a lock; otherwise the behavior is undefined, +* but may exhibit less contention. + +* @par Outputs: +* var: A Tensor. Has the same type and format as input "var". */ REG_OP(SparseApplyFtrlV2) .INPUT(var, TensorType({DT_FLOAT})) @@ -347,6 +359,35 @@ REG_OP(SparseApplyFtrlV2) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(SparseApplyFtrlV2) +/** +* @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme. +* That is for rows we have grad for, we update var, accum and linear + +* @par Inputs: +* Ten inputs, including: +* @li var: A mutable Tensor. Must be of type TensorType::NumberType(). +* Should be a Variable Tensor. +* @li accum: A mutable Tensor of the same type as "var". +* Should be a Variable Tensor. +* @li linear: A mutable Tensor of the same type as "var". +* Should be a Variable Tensor. +* @li grad: A Tensor of the same type as "var", for the gradient. +* @li indices: A vector of indices into the first dimension of var and accum. + +* @par Attributes: +* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. +* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar. +* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar. +* @li l2_shrinkage: A Tensor of the same type as "var", L2 shrinkage regulariation. Must be a scalar. +* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. +* @li use_locking: An optional bool. Defaults to "False". +* If "True", updating of the "var" and "accum" tensors will be +* rotected by a lock; otherwise the behavior is undefined, +* but may exhibit less contention. + +* @par Outputs: +* var: A Tensor. Has the same type and format as input "var". +*/ REG_OP(SparseApplyFtrlV2D) .INPUT(var, TensorType({DT_FLOAT})) .INPUT(accum, TensorType({DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/mvn_ops.h b/third_party/fwkacllib/inc/ops/mvn_ops.h deleted file mode 100755 index c612c45c..00000000 --- a/third_party/fwkacllib/inc/ops/mvn_ops.h +++ /dev/null @@ -1,51 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - #ifndef GE_OP_MVN_OPS_H - #define GE_OP_MVN_OPS_H - - #include "graph/operator_reg.h" - - namespace ge { -/** -*@brief Normalizes the input. - -*@par Inputs: -* One input: -*x: An NCHW tensor of type float16 or float32. - -*@par Attributes: -*@li normalize_variance: An optional bool specifying whether to normalize the variance, either "true" (default) or "false". -*@li across_channels: An optional bool specifying whether to perform across-channel MVN, either "true" or "false" (default). -*@li eps: An optional float32 epsilon for not dividing by zero. Defaults to "1e-9". - -*@par Outputs: -*y: An NCHW tensor of type float16 or float32. - -*@attention Constraints:\n -* The input tensor must have the NCHW format, whose shape length must be 4. -*/ - - REG_OP(MVN) - .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) /* "First operand." */ - .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) /* "Result, has same element type as inputs" */ - .ATTR(normalize_variance, Bool, true) - .ATTR(across_channels, Bool, false) - .ATTR(eps, Float, 1e-9) - .OP_END_FACTORY_REG(MVN) - } // namespace ge - - #endif // GE_OP_MVN_OPS_H diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h old mode 100755 new mode 100644 index bdfb9f4c..fca85035 --- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h @@ -21,6 +21,35 @@ namespace ge { +/** +*@brief A fusion operator for batchnorm. + +*@par Inputs: +*Ten inputs, including: +* @li x: A Tensor. Must be one of the following types: float32. +* @li scale: A Tensor. Must be one of the following types: float32. +* @li b: A Tensor. Must be one of the following types: float32. +* @li mean: A Tensor. Must be one of the following types: float32. +* @li variance: A Tensor. Must be one of the following types: float32. + +*@par Attributes: +* @li mode: A Tensor. Must be one of the following types: int. +* @li epsilon: A Tensor. Must be one of the following types: float32. +* @li momentum: A Tensor. Must be one of the following types: float32. +* @li is_training: A Tensor. Must be one of the following types: bool. +* @li is_training_fusion: A Tensor. Must be one of the following types: bool. +* @li moving_average_fraction: A Tensor. Must be one of the following types: float32. + +*@par Outputs: +*Three outputs, including: +* @li y: A Tensor. Must be one of the following types: float32. +* @li running_mean: A Tensor. Must be one of the following types: float32. +* @li running_variance: A Tensor. Must be one of the following types: float32. +* @li save_mean: A Tensor. Must be one of the following types: float32. +* @li save_inv_variance: A Tensor. Must be one of the following types: float32. +* @li save_inv_variance1: A Tensor. Must be one of the following types: float32. + +*/ REG_OP(FusedBatchNorm) .INPUT(x, TensorType{DT_FLOAT}) .INPUT(scale, TensorType{DT_FLOAT}) @@ -41,6 +70,29 @@ REG_OP(FusedBatchNorm) .ATTR(moving_average_fraction, Float, 0.00300002098) .OP_END_FACTORY_REG(FusedBatchNorm) +/** +*@brief A fusion operator for batchnorm. + +*@par Inputs: +*Ten inputs, including: +* @li dy: A Tensor. Must be one of the following types: float32. +* @li x: A Tensor. Must be one of the following types: float32. +* @li scale: A Tensor. Must be one of the following types: float32. +* @li save_mean: A Tensor. Must be one of the following types: float32. +* @li save_inv_variance: A Tensor. Must be one of the following types: float32. +* @li save_inv_variance1: A Tensor. Must be one of the following types: float32. + +*@par Attributes: +* @li epsilon: A Tensor. Must be one of the following types: float32. +* @li momentum: A Tensor. Must be one of the following types: float32. + +*@par Outputs: +*Three outputs, including: +* @li dx: A Tensor. Must be one of the following types: float32. +* @li bn_scale: A Tensor. Must be one of the following types: float32. +* @li bn_bias: A Tensor. Must be one of the following types: float32. +*/ + REG_OP(FusedBatchNormGrad) .INPUT(dy, TensorType{DT_FLOAT}) .INPUT(x, TensorType{DT_FLOAT}) diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h old mode 100755 new mode 100644 index f2a70ada..5cf56464 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -21,33 +21,47 @@ namespace ge { /** -* @brief Computes the gradients of depthwise convolution with respect to the filter. +* @brief Computes the gradients of depthwise convolution with respect to the +* filter. * @par Inputs: * Three inputs include: \n -* @li input: 4D origin shape of input tensor [N, C, H, W] or [N, H, W, C], support float16, float32, double +* @li input: 4D origin shape of input tensor [N, C, H, W] or [N, H, W, C], +* support float16, float32, double * @li filter_size: A 4D tensor of type int32, with shape [H, W, C, K] -* @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C]. Must be one of the following types: float16, float32, double. +* @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C]. Must be +* one of the following types: float16, float32, double. * @par Attributes: -* @li strides: The stride of the sliding window for height and width of input "x" of the convolution. -* Must be with shape [1, 1, stride_height, stride_width] or [1, stride_height, stride_width, 1]. -* @li dilations: The dilation factor for each dimension of input "x". If set to k > 1, there will be k-1 skipped cells between each -* filter element on that dimension. Must be with shape [1, 1, dilation_height, dilation_width] or [1, dilation_height, dilation_width, 1]. -* @li pads: Padding added to each dimension of the input. -* @li data_format: Input data format, either "NHWC" or "NCHW". +* @li strides: An optional list or tuple. The stride of the sliding window for +* height and width of input "x" of the convolution. +* Must be with shape [1, 1, stride_height, stride_width] or [1, stride_height, +* stride_width, 1]. +* @li dilations: An optional list or tuple. The dilation factor for each +* dimension of input "x". If set to k > 1, there will be k-1 skipped cells +* between each filter element on that dimension. Must be with shape [1, 1, +* dilation_height, dilation_width] or [1, dilation_height, dilation_width, 1]. +* @li pads: An optional list or tuple. Padding added to each dimension of the +* input. +* @li data_format: An optional string. Input data format, either "NHWC" or +* "NCHW". * @par Outputs: -* filter_grad: Gradient of the deep convolution relative to the filter with shape [H, W, C, K]. Must be one of the following types: float16, float32, double. +* filter_grad: Gradient of the deep convolution relative to the filter with +* shape [H, W, C, K]. Must be one of the following types: float16, float32, +* double. * @attention Constraints:\n * The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but * the data is 5D with shape [N, C1, Hi, Wi, C0], where C0 is 16.\n -* The filter is 4D with shape [Hf, Wf, C, K], but the data is 6D with shape [C1, Hf, Wf, K, Co, C0], +* The filter is 4D with shape [Hf, Wf, C, K], but the data is 6D with shape +* [C1, Hf, Wf, K, Co, C0], * where K is fixed at 1, and Co and C0 are 16.\n -* Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the data is 5D with shape [N, C1, Ho, Wo, C0], +* Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the +* data is 5D with shape [N, C1, Ho, Wo, C0], * where C is the same as that of the feature map and C0 is 16.\n -* Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 * stride_h + 32 * filter_h) * ceil(Wi, 16) ≤ l1_size and Hf * Wf ≤ l0b_size/512.\n +* Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 * +* stride_h + 32 * filter_h) * ceil(Wi, 16) ≤ l1_size and Hf*Wf ≤ l0b_size/512.\n */ REG_OP(DepthwiseConv2DBackpropFilter) .INPUT(input, TensorType({float16})) @@ -61,33 +75,44 @@ REG_OP(DepthwiseConv2DBackpropFilter) .OP_END_FACTORY_REG(DepthwiseConv2DBackpropFilter) /** -* @brief Computes the gradients of depthwise convolution with respect to the filter. +* @brief Computes the gradients of depthwise convolution with respect to the +* filter. * @par Inputs: * Two inputs include: \n * @li input: 4D tensor with shape [N, C, H, W] or [N, H, W, C], of type float16 -* @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C], of type float16 +* @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C], of type +* float16 * @par Attributes: -* @li filter_size: Shape of filter. -* @li strides: The stride of the sliding window for height and width of input "x" of the convolution. -* Must be with shape [1, 1, stride_height, stride_width] or [1, stride_height, stride_width, 1]. -* @li dilations: The dilation factor for each dimension of input "x". If set to k > 1, there will be k-1 skipped cells between each -* filter element on that dimension. Must be with shape [1, 1, dilation_height, dilation_width] or [1, dilation_height, dilation_width, 1]. -* @li pads: Padding added to each dimension of the input. -* @li data_format: Input data format, either "NHWC" or "NCHW". +* @li filter_size: An optional list or tuple. Shape of filter. +* @li strides: An optional list or tuple. The stride of the sliding window for +* height and width of input "x" of the convolution. +* Must be with shape [1, 1, stride_height, stride_width] or [1, stride_height, +* stride_width, 1]. +* @li dilations: An optional list or tuple. The dilation factor for each +* dimension of input "x". If set to k > 1, there will be k-1 skipped cells +* between each filter element on that dimension. Must be with shape [1, 1, +* dilation_height, dilation_width] or [1, dilation_height, dilation_width, 1]. +* @li pads: An optional list or tuple. Padding added to each dimension of the +* input. +* @li data_format: An optional string. Input data format, either "NHWC" or +* "NCHW". * @par Outputs: -* filter_grad: Gradient of the deep convolution relative to the filter with shape [H, W, C, K]. Must be of type float32. +* filter_grad: Gradient of the deep convolution relative to the filter with +* shape [H, W, C, K]. Must be of type float32. * @attention Constraints:\n * The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but * the data is 5D with shape [N, C1, Hi, Wi, C0], where C0 is 16.\n -* The filter is 4D with shape [Hf, Wf, C, K], but the data is 6D with shape [C1, Hf, Wf, K, Co, C0], -* where K is fixed at 1, and Co and C0 are 16.\n -* Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the data is 5D with shape [N, C1, Ho, Wo, C0], -* where C is the same as that of the feature map and C0 is 16.\n -* Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 * stride_h + 32 * filter_h) * ceil(Wi, 16) ≤ l1_size and Hf * Wf ≤ l0b_size/512.\n +* The filter is 4D with shape [Hf, Wf, C, K], but the data is 6D with shape +* [C1, Hf, Wf, K, Co, C0], where K is fixed at 1, and Co and C0 are 16.\n +* Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the +* data is 5D with shape [N, C1, Ho, Wo, C0], where C is the same as that of the +* feature map and C0 is 16.\n +* Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 * +* stride_h + 32 * filter_h) * ceil(Wi, 16) ≤ l1_size and Hf*Wf ≤ l0b_size/512.\n */ REG_OP(DepthwiseConv2DBackpropFilterD) .INPUT(input, TensorType({float16})) @@ -101,33 +126,47 @@ REG_OP(DepthwiseConv2DBackpropFilterD) .OP_END_FACTORY_REG(DepthwiseConv2DBackpropFilterD) /** -* @brief Computes the gradients of depthwise convolution with respect to the input. +* @brief Computes the gradients of depthwise convolution with respect to the +* input. * @par Inputs: * Three inputs include: \n -* @li input_size: 4D shape of input tensor [N, C, H, W] or [N, H, W, C], support int32 -* @li filter: 4D filter tensor with shape of [H, W, C, K], support float16, float32, double -* @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C]. Must be one of the following types: float16, float32, double. +* @li input_size: 4D shape of input tensor [N, C, H, W] or [N, H, W, C], +* support int32 +* @li filter: 4D filter tensor with shape of [H, W, C, K], support float16, +* float32, double +* @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C]. +* Must be one of the following types: float16, float32, double. * @par Attributes: -* @li strides: The stride of the sliding window for height and width of input "x" of the convolution. -* Must be with shape [1, 1, stride_height, stride_width] or [1, stride_height, stride_width, 1]. -* @li dilations: The dilation factor for each dimension of input "x". If set to k > 1, there will be k-1 skipped cells between each -* filter element on that dimension. Must be with shape [1, 1, dilation_height, dilation_width] or [1, dilation_height, dilation_width, 1]. -* @li pads: Padding added to each dimension of the input. -* @li data_format: Input data format, either "NHWC" or "NCHW". +* @li strides: An optional list or tuple. The stride of the sliding window for +* height and width of input "x" of the convolution. +* Must be with shape [1, 1, stride_height, stride_width] or [1, stride_height, +* stride_width, 1]. +* @li dilations: An optional list or tuple. The dilation factor for each +* dimension of input "x". If set to k > 1, there will be k-1 skipped cells +* between each filter element on that dimension. Must be with shape [1, 1, +* dilation_height, dilation_width] or [1, dilation_height, dilation_width, 1]. +* @li pads: An optional list or tuple. Padding added to each dimension of the +* input. +* @li data_format: An optional string. Input data format, either "NHWC" or +* "NCHW". * @par Outputs: -* input_grad: Gradient of the deep convolution relative to the input with shape [N, C, H, W] or [N, H, W, C] Must be one of the following types: float16, float32, double. +* input_grad: Gradient of the deep convolution relative to the input with shape +* [N, C, H, W] or [N, H, W, C] Must be one of the following types: float16, +* float32, double. * @attention Constraints:\n * The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but * the data is 5D with shape [N, C1, Hi, Wi, C0], where C0 is 16.\n -* The filter is 4D with shape [Hf, Wf, C, K], but the data is 6D with shape [C1, Hf, Wf, K, Co, C0], -* where K is fixed at 1, and Co and C0 are 16.\n -* Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the data is 5D with shape [N, C1, Ho, Wo, C0], -* where C is the same as that of the feature map and C0 is 16.\n -* Limited by Tiling: max_h_in_l1 ≥ C0, where max_h_in_l1 = (l1_size - Hf*Wf*C0*C0*2) / (2* Wo *C0).\n +* The filter is 4D with shape [Hf, Wf, C, K], but the data is 6D with shape +* [C1, Hf, Wf, K, Co, C0], where K is fixed at 1, and Co and C0 are 16.\n +* Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the +* data is 5D with shape [N, C1, Ho, Wo, C0], where C is the same as that of the +* feature map and C0 is 16.\n +* Limited by Tiling: max_h_in_l1 ≥ C0, where max_h_in_l1 = (l1_size - Hf * Wf * +* C0 * C0 * 2) / (2 * Wo * C0).\n */ REG_OP(DepthwiseConv2DBackpropInput) .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) @@ -141,33 +180,44 @@ REG_OP(DepthwiseConv2DBackpropInput) .OP_END_FACTORY_REG(DepthwiseConv2DBackpropInput) /** -* @brief Computes the gradients of depthwise convolution with respect to the input. +* @brief Computes the gradients of depthwise convolution with respect to the +* input. * @par Inputs: * Two inputs include: \n * @li filter: A 4D tensor of type float16, with shape [H, W, C, K] -* @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C], of type float16 +* @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C], of type +* float16 * @par Attributes: -* @li input_size: The origin shape of input. -* @li strides: The stride of the sliding window for height and width of input "x" of the convolution. -* Must be with shape [1, 1, stride_height, stride_width] or [1, stride_height, stride_width, 1]. -* @li dilations: The dilation factor for each dimension of input "x". If set to k > 1, there will be k-1 skipped cells between each -* filter element on that dimension. Must be with shape [1, 1, dilation_height, dilation_width] or [1, dilation_height, dilation_width, 1]. -* @li pads: Padding added to each dimension of the input. -* @li data_format: Input data format, either "NHWC" or "NCHW". +* @li input_size: An optional list or tuple. The origin shape of input. +* @li strides: An optional list or tuple. The stride of the sliding window for +* height and width of input "x" of the convolution. +* Must be with shape [1, 1, stride_height, stride_width] or [1, stride_height, +* stride_width, 1]. +* @li dilations: An optional list or tuple. The dilation factor for each +* dimension of input "x". If set to k > 1, there will be k-1 skipped cells +* between each filter element on that dimension. Must be with shape [1, 1, +* dilation_height, dilation_width] or [1, dilation_height, dilation_width, 1]. +* @li pads: An optional list or tuple. Padding added to each dimension of the +* input. +* @li data_format: An optional string. Input data format, either "NHWC" or +* "NCHW". * @par Outputs: -* input_grad: Gradient of the deep convolution relative to the input with shape [N, C, H, W] or [N, H, W, C]. Must be of type float16. +* input_grad: Gradient of the deep convolution relative to the input with shape +* [N, C, H, W] or [N, H, W, C]. Must be of type float16. * @attention Constraints:\n * The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but * the data is 5D with shape [N, C1, Hi, Wi, C0], where C0 is 16.\n -* The filter is 4D with shape [Hf, Wf, C, K], but the data is 6D with shape [C1, Hf, Wf, K, Co, C0], -* where K is fixed at 1, and Co and C0 are 16.\n -* Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the data is 5D with shape [N, C1, Ho, Wo, C0], -* where C is the same as that of the feature map and C0 is 16.\n -* Limited by Tiling: max_h_in_l1 ≥ C0, where max_h_in_l1 = (l1_size - Hf*Wf*C0*C0*2) / (2* Wo *C0).\n +* The filter is 4D with shape [Hf, Wf, C, K], but the data is 6D with shape +* [C1, Hf, Wf, K, Co, C0], where K is fixed at 1, and Co and C0 are 16.\n +* Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the +* data is 5D with shape [N, C1, Ho, Wo, C0], where C is the same as that of the +* feature map and C0 is 16.\n +* Limited by Tiling: max_h_in_l1 ≥ C0, where max_h_in_l1 = (l1_size - Hf * Wf * +* C0 * C0 * 2) / (2 * Wo * C0).\n */ REG_OP(DepthwiseConv2DBackpropInputD) .INPUT(filter, TensorType({DT_FLOAT16})) @@ -181,7 +231,8 @@ REG_OP(DepthwiseConv2DBackpropInputD) .OP_END_FACTORY_REG(DepthwiseConv2DBackpropInputD) /** -*@brief Computes a 2D deep convolution given a 4D input tensor and a filter tensor. +*@brief Computes a 2D deep convolution given a 4D input tensor and a filter +* tensor. *@par Inputs: *Two required inputs and two optional inputs, including: \n @@ -191,13 +242,19 @@ REG_OP(DepthwiseConv2DBackpropInputD) * @li offset_w: An optional float16, used for quantized inference * @par Attributes: -* @li strides: The stride of the sliding window for height and width of input "x" of the convolution. -* Must be with shape [1, 1, stride_height, stride_width] or [1, stride_height, stride_width, 1]. -* @li dilations: The dilation factor for each dimension of input "x". If set to k > 1, there will be k-1 skipped cells between each -* filter element on that dimension. Must be with shape [1, 1, dilation_height, dilation_width] or [1, dilation_height, dilation_width, 1]. -* @li pads: Padding added to each dimension of the input. -* @li data_format: Input data format, either "NHWC" or "NCHW". -* @li offset_a: Input offset, used for quantized inference. +* @li strides: An optional list or tuple. The stride of the sliding window for +* height and width of input "x" of the convolution. +* Must be with shape [1, 1, stride_height, stride_width] or [1, stride_height, +* stride_width, 1]. +* @li dilations: An optional list or tuple. The dilation factor for each +* dimension of input "x". If set to k > 1, there will be k-1 skipped cells +* between each filter element on that dimension. Must be with shape [1, 1, +* dilation_height, dilation_width] or [1, dilation_height, dilation_width, 1]. +* @li pads: An optional list or tuple. Padding added to each dimension of the +* input. +* @li data_format: An optional string. Input data format, either "NHWC" or +* "NCHW". +* @li offset_a: An optional int. Input offset, used for quantized inference. * @par Outputs: * y: 4D tensor of type float16, with shape [N, C, H, W] or [N, H, W, C] @@ -205,10 +262,11 @@ REG_OP(DepthwiseConv2DBackpropInputD) * @attention Constraints:\n * The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but * the data is 5D with shape [N, C1, Hi, Wi, C0], where C0 is 16.\n -* The filter is 4D with shape [Hf, Wf, C, K], but the data is 6D with shape [C1, Hf, Wf, K, Co, C0], -* where K is fixed at 1, and Co and C0 are 16.\n +* The filter is 4D with shape [Hf, Wf, C, K], but the data is 6D with shape +* [C1, Hf, Wf, K, Co, C0], where K is fixed at 1, and Co and C0 are 16.\n * Limited by the size of L1 buffer memory: \n -* (l1_size - filter_h*filter_w*BLOCK_SIZE*BLOCK_SIZE*data_size) // (Wi*BLOCK_SIZE*data_size) >= (BLOCK_SIZE*strides_h + filter_h - strides_h).\n +* (l1_size - filter_h*filter_w*BLOCK_SIZE*BLOCK_SIZE*data_size) // (Wi * +* BLOCK_SIZE * data_size) >= (BLOCK_SIZE * strides_h + filter_h - strides_h).\n */ REG_OP(DepthwiseConv2D) .INPUT(x, TensorType({DT_FLOAT16})) diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h old mode 100755 new mode 100644 index 5769f426..618dadf8 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -19,6 +19,21 @@ #include "../graph/operator_reg.h" namespace ge { + +/** +*@brief Computes the gradient for log softmax activations. + +*@par Inputs: +*@li grad: A Tensor. Must be one of the following types: float16, float32. +*@li x: A Tensor. Must be one of the following types: float16, float32. + +*@par Attributes: +* axis: An optional list of ints. Defaults to "{-1}". + +*@par Outputs: +* y: A Tensor. Has the same type as "grad". +*/ + REG_OP(LogSoftmaxGrad) .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -195,6 +210,27 @@ REG_OP(Scale) .ATTR(beta, Float, 0.0) .OP_END_FACTORY_REG(Scale) +REG_OP(SoftmaxGradExt) + .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(x1, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(x2, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) + .ATTR(axis, ListInt, {-1}) + .ATTR(keep_dims, Bool, false) + .OP_END_FACTORY_REG(SoftmaxGradExt) + +/** +*@brief Confuse mul, sum and sub. + +*@par Inputs: +*Two inputs, including: +* @li grad: A Tensor. Must be one of the following types: float16, float32. +* @li x: A Tensor. Must be one of the following types: float16, float32. + +*@par Outputs: +* y: A Tensor of the same type as "grad". + +*/ REG_OP(ConfusionSoftmaxGrad) .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/nn_ops.h b/third_party/fwkacllib/inc/ops/nn_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/nn_other_ops.h b/third_party/fwkacllib/inc/ops/nn_other_ops.h old mode 100755 new mode 100644 index 701296f8..125b21a5 --- a/third_party/fwkacllib/inc/ops/nn_other_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_other_ops.h @@ -79,6 +79,29 @@ REG_OP(HistogramFixedWidthD) .ATTR(dtype, String, "int32") .OP_END_FACTORY_REG(HistogramFixedWidthD) +/** +*@brief Layernorm operator interface implementation +* calculating: x, gamma, beta +* mean = np.mean(x, reduce_axis, keepdims=True) +* variance = np.mean(np.power((x - mean),2), reduce_axis, keepdims=True) +* y = gamma*((x - mean) / np.sqrt(variance + 0.001)) + beta + +*@par Inputs: +*Three inputs, including: +* @li x: A Tensor. Must be one of the following types: float16, float32. +* @li gamma: A Tensor. Must be one of the following types: float16, float32. +* @li beta: A Tensor. Must be one of the following types: float16, float32. + +*@par Attributes: +* @li begin_norm_axis: A required attribute, the type is int32. +* @li begin_params_axis: A required attribute,the type is int32. + +*@par Outputs: +*Three outputs, including: +* @li y: A Tensor. Must be one of the following types: float16, float32. +* @li mean: A Tensor. Must be one of the following types: float16, float32. +* @li variance: A Tensor. Must be one of the following types: float16, float32. +*/ REG_OP(LayerNorm) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -90,6 +113,38 @@ REG_OP(LayerNorm) .ATTR(begin_params_axis, Int, 0) .OP_END_FACTORY_REG(LayerNorm) +/** +*@brief LayerNormGrad operator interface implementation +* calculating: dy, x, variance, mean, gamma +* pd_xl = data_dy*data_gamma +* pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean) +* np.power((data_variance + EPSLON), (-1.5))), +* reduce_axis, keepdims=True) +* pd_mean = np.sum(((-1.0)*pd_xl +* np.power((data_variance + EPSLON), (-0.5))), +* reduce_axis, keepdims=True) +* + pd_var*(1.0/m) +* np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True) +* pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) + +* pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m) +* pd_gamma = np.sum((data_dy*(data_x - data_mean) +* np.power((data_variance + EPSLON), (-0.5))), param_axis, keepdims=True) +* pd_beta = np.sum(data_dy, param_axis, keepdims=True) + +*@par Inputs: +*Three inputs, including: +* @li dy: A Tensor. Must be one of the following types: float16, float32. +* @li x: A Tensor. Must be one of the following types: float16, float32. +* @li variance: A Tensor. Must be one of the following types: float16, float32. +* @li mean: A Tensor. Must be one of the following types: float16, float32. +* @li gamma: A Tensor. Must be one of the following types: float16, float32. + +*@par Outputs: +*Three outputs, including: +* @li pd_x: A Tensor. Must be one of the following types: float16, float32. +* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. +* @li pd_beta: A Tensor. Must be one of the following types: float16, float32. +*/ REG_OP(LayerNormGrad) .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -101,6 +156,36 @@ REG_OP(LayerNormGrad) .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16})) .OP_END_FACTORY_REG(LayerNormGrad) +/** +*@brief LayerNormXBackprop operator interface implementation +* calculating: dy, x, variance, mean, gamma +* pd_xl = data_dy*data_gamma +* pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean) +* np.power((data_variance + EPSLON), (-1.5))), +* reduce_axis, keepdims=True) +* pd_mean = np.sum(((-1.0)*pd_xl +* np.power((data_variance + EPSLON), (-0.5))), +* reduce_axis, keepdims=True) +* + pd_var*(1.0/m) +* np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True) +* pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) + +* pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m) +* pd_gamma = np.sum((data_dy*(data_x - data_mean) +* np.power((data_variance + EPSLON), (-0.5))), param_axis, keepdims=True) +* pd_beta = np.sum(data_dy, param_axis, keepdims=True) + +*@par Inputs: +*Three inputs, including: +* @li dy: A Tensor. Must be one of the following types: float16, float32. +* @li x: A Tensor. Must be one of the following types: float16, float32. +* @li variance: A Tensor. Must be one of the following types: float16, float32. +* @li mean: A Tensor. Must be one of the following types: float16, float32. +* @li gamma: A Tensor. Must be one of the following types: float16, float32. + +*@par Outputs: +*Three outputs, including: +* @li pd_x: A Tensor. Must be one of the following types: float16, float32. +*/ REG_OP(LayerNormXBackprop) .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -110,6 +195,36 @@ REG_OP(LayerNormXBackprop) .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16})) .OP_END_FACTORY_REG(LayerNormXBackprop) +/** +*@brief LayerNormBetaGammaBackprop operator interface implementation +* calculating: dy, x, variance, mean +* pd_xl = data_dy*data_gamma +* pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean) +* np.power((data_variance + EPSLON), (-1.5))), +* reduce_axis, keepdims=True) +* pd_mean = np.sum(((-1.0)*pd_xl +* np.power((data_variance + EPSLON), (-0.5))), +* reduce_axis, keepdims=True) +* + pd_var*(1.0/m) +* np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True) +* pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) + +* pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m) +* pd_gamma = np.sum((data_dy*(data_x - data_mean) +* np.power((data_variance + EPSLON), (-0.5))), param_axis, keepdims=True) +* pd_beta = np.sum(data_dy, param_axis, keepdims=True) + +*@par Inputs: +*Three inputs, including: +* @li dy: A Tensor. Must be one of the following types: float16, float32. +* @li x: A Tensor. Must be one of the following types: float16, float32. +* @li variance: A Tensor. Must be one of the following types: float16, float32. +* @li mean: A Tensor. Must be one of the following types: float16, float32. + +*@par Outputs: +*Three outputs, including: +* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. +* @li pd_beta: A Tensor. Must be one of the following types: float16, float32. +*/ REG_OP(LayerNormBetaGammaBackprop) .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -120,6 +235,27 @@ REG_OP(LayerNormBetaGammaBackprop) .REQUIRED_ATTR(shape_gamma, ListInt) .OP_END_FACTORY_REG(LayerNormBetaGammaBackprop) +/** +*@brief Return "output" according to the algorithm of dropout_do_mask: \n +* scale_x = x *(1 / keep_prob) +* output = select(mask == 1, scale_x, 0) + +*@par Inputs: +*Three inputs, including: \n +* @li x: A mutable Tensor. Must be one of the following types: +* float16, float32 +* @li mask: A mutable Tensor. Must met all of the following rules: +* shape of mask should be 1D. +* dtype of mask should be uint8. +* value of shape should met the following algorithm: +* value = (size(x) + 128 - 1) // 128 * 128 //8 +* @li keep_prob: A mutable Tensor. Must met all of the following rules: +* shape of "keep_prob" should be (1,) or [1,]. +* Has the same type as "x". + +*@par Output: +*y: A mutable Tensor. Has the same type as "x". +*/ REG_OP(DropOutDoMask) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) .INPUT(mask, TensorType({DT_UINT8})) diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index 905c7d5d..e1fb8558 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -118,14 +118,14 @@ REG_OP(MaxPool) * @li grad: A mutable NC1HWC0 tensor of type RealNumberType. * @par Attributes: -* @li ksize: A tuple or list, specifying the size of the window for each -* dimension of the input tensor. -* @li strides: A tuple or list, specifying the stride of the sliding window for +* @li ksize: A required tuple or list, specifying the size of the window for * each dimension of the input tensor. -* @li padding: A string, specifying the type of padding algorithm to use. +* @li strides: A required tuple or list, specifying the stride of the sliding +* window for each dimension of the input tensor. +* @li padding: A required string, specifying the type of padding algorithm to use. * @par Outputs: -* y: A mutable tensor. Has the same shape and type as "x1. +* y: A mutable tensor. Has the same shape and type as "x1". * @attention Constraints: * @li Computing gradients of global pooling is not supported, which means @@ -151,14 +151,16 @@ REG_OP(MaxPoolGrad) * @li grad: Gradient tensor of type float16 * @par Attributes: -* @li ksize: A required list, specifying the size of the sliding window. -* @li strides: A required list, specifying the stride of the sliding window. -* @li padding: window sliding mode. Either SAME or VALID. -* @li data_format: Format of the original input, either NCHW or NHWC. Defaults +* @li ksize: A required list or tuple, specifying the size of the sliding window. +* @li strides: A required list or tuple, +* specifying the stride of the sliding window. +* @li padding: A required string, window sliding mode. Either SAME or VALID. +* @li data_format: An optional string. Format of the original input, +* either NCHW or NHWC. Defaults * to NHWC. * @attention Constraints: -* @li Only the cloud platform is supported. +* @li Only the Ascend 910 platform is supported. * @li "x1" and "grads" must have the same shape. * @li "x2" and "y" must have the same shape. Otherwise, an error is reported. * @li "x1", "x2", "grads", and "y" must be 5D tensors. @@ -232,13 +234,13 @@ REG_OP(MaxPoolGradWithArgmax) * @brief Computes second-order gradients of the maxpooling function. * @par Inputs: -* @li x:Original forward input tensor of type float16 -* @li grad:Gradient tensor of type float16 -* @li argmax:An tensor of type uint16 +* @li x: Original forward input tensor of type float16 +* @li grad: Gradient tensor of type float16 +* @li argmax: An tensor of type uint16 * @par Attributes: * @li ksize: A required list, specifying the size of the sliding window. * @li strides: A required list, specifying the stride of the sliding window. -* @li padding: window sliding mode. Either SAME or VALID. +* @li padding: A required string, window sliding mode. Either SAME or VALID. * @par Outputs: * @li y:Result tensor of type float16 @@ -269,12 +271,13 @@ REG_OP(MaxPoolGradGradWithArgmax) * @li input_grad: An NHWC tensor of type float16, float32, or double. * @par Attributes: -* @li ksize: A tuple or list, specifying the size of the window for each -* dimension of the input tensor. -* @li strides: A tuple or list, specifying the stride of the sliding window for\n +* @li ksize: A required tuple or list, specifying the size of the window for * each dimension of the input tensor. -* @li padding: A string, specifying the type of the padding algorithm to use. -* @li data_format: A string. Defaults to "NHWC". +* @li strides: A required tuple or list, specifying the stride of the sliding +* window for each dimension of the input tensor. +* @li padding: A required string, specifying the type of the padding algorithm +* to use. +* @li data_format: An optional string. Defaults to "NHWC". * @par Outputs: * @out_grad: A mutable tensor with the same shape and type as "orig_input". @@ -297,12 +300,13 @@ REG_OP(AvgPoolGrad) * @par Attributes: * @li orig_input_shape: Original input dimensions. -* @li ksize: A tuple or list, specifying the size of the window for each -* dimension of the input tensor. -* @li strides: A tuple or list, specifying the stride of the sliding window for\n +* @li ksize: A required tuple or list, specifying the size of the window for * each dimension of the input tensor. -* @li padding: A string, specifying the type of the padding algorithm to use. -* @li data_format: A string. Defaults to "NHWC". +* @li strides: A required tuple or list, specifying the stride of the sliding +* window for each dimension of the input tensor. +* @li padding: A required string, specifying the type of the padding algorithm +* to use. +* @li data_format: An optional string. Defaults to "NHWC". * @par Outputs: * @out_grad: A mutable tensor with the same shape and type as "orig_input". @@ -348,12 +352,6 @@ REG_OP(MaxPoolGradWithArgmaxCCE) .ATTR(nan_opt, Int, 0) .OP_END_FACTORY_REG(MaxPoolGradWithArgmaxCCE) -REG_OP(Upsample) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) - .ATTR(scale, Float, 1) - .ATTR(stride, Int, 2) - .OP_END_FACTORY_REG(Upsample) } // namespace ge #endif // GE_OP_NN_POOLING_OPS_H diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h index ed43fb02..63fd59d6 100644 --- a/third_party/fwkacllib/inc/ops/nn_training_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h @@ -480,12 +480,6 @@ REG_OP(SGD) * mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\n * var <- var - mom\n * -* @attention Constraints: -* @li Note that in dense implementation of this algorithm, "ms" and "mom" will\n -* update even if "grad" is 0, but in this sparse implementation, "ms" and "mom"\n -* will not update in iterations during which "grad" is 0.\n -* @li The input tensors "var", "ms", "mom" and "grad" must have the same shape. -* * @par Inputs: * @li var: A mutable tensor. Must be one of the data types defined in\n * TensorType::NumberType(). Should be from a Variable(). @@ -506,6 +500,12 @@ REG_OP(SGD) * * @par Outputs: * var: A mutable tensor. Has the same type as input "var". +* +* @attention Constraints: +* @li Note that in dense implementation of this algorithm, "ms" and "mom" will\n +* update even if "grad" is 0, but in this sparse implementation, "ms" and "mom"\n +* will not update in iterations during which "grad" is 0.\n +* @li The input tensors "var", "ms", "mom" and "grad" must have the same shape. */ REG_OP(ApplyRMSProp) .INPUT(var, TensorType::NumberType()) @@ -529,12 +529,6 @@ REG_OP(ApplyRMSProp) * mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\n * var <- var - mom * -* @attention Constraints: -* @li Note that in dense implementation of this algorithm, "ms" and "mom" will\n -* update even if "grad" is 0, but in this sparse implementation, "ms" and "mom"\n -* will not update in iterations during which "grad" is 0. -* @li The input tensors "var", "ms", "mom" and "grad" must have the same shape. -* * @par Inputs: * @li var: A mutable tensor. Must be one of the data types defined in\n * TensorType::NumberType(). Should be from a Variable(). @@ -549,12 +543,17 @@ REG_OP(ApplyRMSProp) * @li use_locking: An optional "bool". Defaults to "False". If "True", updating\n * of the "var", "ms", and "mom" tensors will be protected by a lock; otherwise * the behavior is undefined, but may exhibit less contention. -* @li rho: A scalar. Must have the same type as "var". -* @li momentum: A scalar. Must have the same type as "var". -* @li epsilon: A scalar. Must have the same type as "var". +* @li rho: A required scalar. Must have the same type as "var". +* @li momentum: A required scalar. Must have the same type as "var". +* @li epsilon: A required scalar. Must have the same type as "var". * * @par Outputs: * var: A mutable tensor. Must have the same type as input "var". +* @attention Constraints: +* @li Note that in dense implementation of this algorithm, "ms" and "mom" will\n +* update even if "grad" is 0, but in this sparse implementation, "ms" and "mom"\n +* will not update in iterations during which "grad" is 0. +* @li The input tensors "var", "ms", "mom" and "grad" must have the same shape. */ REG_OP(ApplyRMSPropD) .INPUT(var, TensorType::NumberType()) @@ -769,6 +768,8 @@ REG_OP(ApplyAdam) .INPUT(epsilon, TensorType::NumberType()) .INPUT(grad, TensorType::NumberType()) .OUTPUT(var, TensorType::NumberType()) + .OUTPUT(m, TensorType::NumberType()) + .OUTPUT(v, TensorType::NumberType()) .ATTR(use_locking, Bool, false) .ATTR(use_nesterov, Bool, false) .OP_END_FACTORY_REG(ApplyAdam) @@ -810,6 +811,37 @@ REG_OP(ApplyAdadelta) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(ApplyAdadelta) +/** +*@brief Updates "var" according to the ApplyMomentum algorithm. \n +* accum = accum * momentum + x1 * x2 +* if use_nesterov is True: +* var -= x1 * x2 * lr + accum * momentum * lr +* else: +* var -= accum * lr + +*@par Inputs: +*Six inputs, including: +* @li var: A mutable Tensor of type TensorType::NumberType(). +* Should be a Variable Tensor. +* @li accum: A mutable Tensor of the same type as "var". +* Should be a Variable Tensor. +* @li lr: A scalar of the same type as "var", for the scaling factor. +* @li x1: A Tensor of type TensorType::NumberType(). +* @li momentum: A scalar of the same type as "var". +* @li x2: A Tensor of the same type as "var". + +*@par Attributes: +*Two Attributes, including: +*@li use_nesterov: An optional bool. Defaults to "False". \n +* If True, the tensor passed to compute grad will be var - lr * momentum * accum, \n +* so in the end, the var you get is actually var - lr * momentum * accum. +*@li use_locking: An optional bool. Defaults to "False". \n +* If "True", updating of the "var", m", and "v" tensors will be protected \n +* by a lock; otherwise the behavior is undefined, but may exhibit less contention. + +*@par Outputs: +*var: A mutable Tensor. Has the same type as "var". +*/ REG_OP(FusedMulApplyMomentum) .INPUT(var, TensorType::NumberType()) .INPUT(accum, TensorType::NumberType()) @@ -822,6 +854,39 @@ REG_OP(FusedMulApplyMomentum) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(FusedMulApplyMomentum) +/** +*@brief Updates "var" according to the ApplyMomentum algorithm. \n +* accum = accum * momentum + x1 * x2 +* if use_nesterov is True: +* var -= x1 * x2 * lr + accum * momentum * lr +* else: +* var -= accum * lr + +*@par Inputs: +*Six inputs, including: +* @li var: A mutable Tensor of type TensorType::NumberType(). +* Should be a Variable Tensor. +* @li accum: A mutable Tensor of the same type as "var". +* Should be a Variable Tensor. +* @li lr: A scalar of the same type as "var", for the scaling factor. +* @li x1: A Tensor of type TensorType::NumberType(). +* @li momentum: A scalar of the same type as "var". +* @li x2: A Tensor of the same type as "var". + +*@par Attributes: +*Two Attributes, including: +*@li use_nesterov: An optional bool. Defaults to "False". \n +* If True, the tensor passed to compute grad will be var - lr * momentum * accum, \n +* so in the end, the var you get is actually var - lr * momentum * accum. +*@li use_locking: An optional bool. Defaults to "False". \n +* If "True", updating of the "var", m", and "v" tensors will be protected \n +* by a lock; otherwise the behavior is undefined, but may exhibit less contention. + +*@par Outputs: +*Two outputs, including: +*@li var: A Tensor. Has the same type as "var". +*@li var_copy: A Tensor. Has the same type as "var". +*/ REG_OP(FusedMulApplyMomentumExtern) .INPUT(var, TensorType::NumberType()) .INPUT(accum, TensorType::NumberType()) @@ -836,6 +901,26 @@ REG_OP(FusedMulApplyMomentumExtern) .ATTR(use_locking, Bool, false) .OP_END_FACTORY_REG(FusedMulApplyMomentumExtern) +/** +*@brief Update "g" according to the LARS algorithm. + +*@par Inputs: +*Four inputs, including: +* @li w: A Tensor. Must be of type TensorType::DT_FLOAT. +* @li g: A Tensor of the same type and shape as "w". +* @li weight_decay: A Tensor of the same type as "w", Must be a scalar. +* @li learning_rate: A Tensor of the same type as "w", Must be a scalar. + +*@par Attributes: +*Three Attributes, including: +* @li hyperpara: An optional float. Default value is 0.001. +* @li epsilon: An optional float. Default value is 1e-5.Avoid denominator is 0. +* @li use_clip: An optional bool. Defaults to "False".\n +* If "True", updating learning rate. + +*@par Outputs: +*g_new: Tensor of the same type as "w". +*/ REG_OP(LarsV2) .INPUT(w, TensorType(DT_FLOAT)) .INPUT(g, TensorType(DT_FLOAT)) @@ -847,6 +932,28 @@ REG_OP(LarsV2) .ATTR(use_clip, Bool, false) .OP_END_FACTORY_REG(LarsV2) +/** +*@brief Update "g" according to the LARS algorithm. + +*@par Inputs: +*Six inputs, including: +* @li w: A Tensor. Must be of type TensorType::DT_FLOAT. +* @li g: A Tensor of the same type and shape as "w". +* @li w_square_sum: A Tensor of square_sum(w), has the same type as "w", Must be a scalar. +* @li g_square_sum: A Tensor of square(g), has the same type as "w", Must be a scalar. +* @li weight_decay: A Tensor of the same type as "w", Must be a scalar. +* @li learning_rate: A Tensor of the same type as "w", Must be a scalar. + +*@par Attributes: +*Three Attributes, including: +* @li hyperpara: An optional float. Default value is 0.001. +* @li epsilon: An optional float. Default value is 1e-5.Avoid denominator is 0. +* @li use_clip: An optional bool. Defaults to "False".\n +* If "True", updating learning rate. + +*@par Outputs: +*g_new: Tensor of the same type as "w". +*/ REG_OP(LarsV2Update) .INPUT(w, TensorType(DT_FLOAT)) .INPUT(g, TensorType(DT_FLOAT)) diff --git a/third_party/fwkacllib/inc/ops/no_op.h b/third_party/fwkacllib/inc/ops/no_op.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index 8c1985d1..42ab1a4c 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -114,14 +114,14 @@ REG_OP(Relu6) /** * @brief Computes rectified linear 6 gradients for a Relu6 operation. -* z = dy * (y > 0) * (y < 6). +* backprops = gradients * (features > 0) * (features < 6). * @par Inputs: -* @li y: A Tensor of type RealNumberType. -* @li dy: A Tensor of type RealNumberType. +* @li features: A Tensor of type RealNumberType. +* @li gradients: A Tensor of type RealNumberType. * @par Outputs: -* z: A Tensor of type RealNumberType. +* backprops: A Tensor of type RealNumberType. */ REG_OP(Relu6Grad) .INPUT(gradients, TensorType::RealNumberType()) @@ -139,9 +139,7 @@ REG_OP(Relu6Grad) * A Tensor. Has the same type as "x". * @attention Constraints: -* @li "x" is with shape (D1, D2, ..., DK), where, D1 * D2... * Dn <= 2^31-1, -* Di <= 1000000, n <= 8. -* @li Ascend 310 provides only 1?? accuracy for the result. +* @li Ascend 310 provides only 1e-3 accuracy for the result. * @see Relu() */ @@ -306,12 +304,6 @@ REG_OP(EluGrad) .OUTPUT(y, TensorType::FloatingDataType()) .OP_END_FACTORY_REG(EluGrad) -REG_OP(LeakyRelu) - .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) - .ATTR(negative_slope, Float, 0.0) - .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) - .OP_END_FACTORY_REG(LeakyRelu) - } // namespace ge #endif // GE_OP_NONLINEAR_FUC_OPS_H diff --git a/third_party/fwkacllib/inc/ops/normalize_ops.h b/third_party/fwkacllib/inc/ops/normalize_ops.h deleted file mode 100644 index 3083c1a8..00000000 --- a/third_party/fwkacllib/inc/ops/normalize_ops.h +++ /dev/null @@ -1,51 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - #ifndef GE_OP_NORMALIZE_OPS_H - #define GE_OP_NORMALIZE_OPS_H - - #include "graph/operator_reg.h" - - namespace ge { - - REG_OP(Normalize) - .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) - .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) - .ATTR(across_spatial, Bool, true) - .ATTR(channel_shared, Bool, true) - .ATTR(eps, Float, 1e-10) - .OP_END_FACTORY_REG(Normalize); - - REG_OP(NormalizeSum) - .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) - .ATTR(across_spatial, Bool, true) - .OP_END_FACTORY_REG(NormalizeSum); - - REG_OP(NormalizeScale) - .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) - .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) - .INPUT(x3, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) - .ATTR(across_spatial, Bool, true) - .ATTR(channel_shared, Bool, true) - .ATTR(eps, Float, 1e-10) - .OP_END_FACTORY_REG(NormalizeScale); - - } // namespace ge - - #endif // GE_OP_NORMALIZE_OPS_H diff --git a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/outfeed_ops.h b/third_party/fwkacllib/inc/ops/outfeed_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h index dce1d906..5c0a1ce0 100644 --- a/third_party/fwkacllib/inc/ops/pad_ops.h +++ b/third_party/fwkacllib/inc/ops/pad_ops.h @@ -111,12 +111,41 @@ REG_OP(BroadcastToD) .REQUIRED_ATTR(shape, ListInt) .OP_END_FACTORY_REG(BroadcastToD) +/** +*@brief Pads a tensor. + +*@par Inputs: +*Two inputs, including: +* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, +* uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, +* complex128, uint32, uint64. +* @li paddings: A Tensor of type int32 or int64. + +*@par Outputs: +*y: A Tensor of the same type as "x". +*/ REG_OP(Pad) .INPUT(x, TensorType::BasicType()) .INPUT(paddings, TensorType::IndexNumberType()) .OUTPUT(y, TensorType::BasicType()) .OP_END_FACTORY_REG(Pad) +/** +*@brief Pads a tensor. + +*@par Inputs: +*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32. + +*@par Attributes: +*paddings: An optional "vector>". Defaults to "{}". +* For each dimension D of input, paddings[D, 0] indicates how many +* values to add before the contents of tensor in that dimension, +* and paddings[D, 1] indicates how many values to add after the +* contents of tensor in that dimension. + +*@par Outputs: +*y: A Tensor of the same type as "x". +*/ REG_OP(PadD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/power_ops.h b/third_party/fwkacllib/inc/ops/power_ops.h deleted file mode 100644 index e2b5656d..00000000 --- a/third_party/fwkacllib/inc/ops/power_ops.h +++ /dev/null @@ -1,34 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - #ifndef GE_OP_POWER_H - #define GE_OP_POWER_H - - #include "../graph/operator_reg.h" - - namespace ge { - - REG_OP(Power) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) - .ATTR(power, Float, 1.0) - .ATTR(scale, Float, 1.0) - .ATTR(shift, Float, 0.0) - .OP_END_FACTORY_REG(Power); - - } // namespace ge - - #endif // GE_OP_POWER_H diff --git a/third_party/fwkacllib/inc/ops/prior_box.h b/third_party/fwkacllib/inc/ops/prior_box.h deleted file mode 100644 index f0c275f2..00000000 --- a/third_party/fwkacllib/inc/ops/prior_box.h +++ /dev/null @@ -1,129 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - #ifndef GE_OP_PRIORBOX_H - #define GE_OP_PRIORBOX_H - - #include "graph/operator_reg.h" - - namespace ge { -/** -*@brief Performs SSD prior box detection. - -*@par Inputs: -* Two inputs, including: -*@li feature: An NC1HWC0 or NCHW feature map of type is float32 or float16. -*@li img: source image. Has the same type and format as "feature". - -*@par Attributes: -*@li min_size: A required float32, specifying the minimum edge length of a square prior box. -*@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size) -*@li aspect_ratio: An optional float32, specifying the aspect ratio for generated rectangle boxes. The height is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaults to "1.0". -*@li img_size: An optional int32, specifying the source image size. Defaults to "0". -*@li img_h: An optional int32, specifying the source image height. Defaults to "0". -*@li img_w: An optional int32, specifying the source image width. Defaults to "0". -*@li step: An optional float32, specifying the step for mapping the center point from the feature map to the source image. Defaults to "0.0". -*@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image. Defaults to "0.0". -*@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image. Defaults to "0.0". -*@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True". -*@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False". -*@li offset: An optional float32, specifying the offset. Defaults to "0.5". -*@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value). - -*@par Outputs: -*y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance. - -*@attention Constraints:\n -* This operator applies only to SSD networks. -*@see SSDDetectionOutput() -*/ - REG_OP(PriorBox) - .INPUT(feature, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) - .REQUIRED_ATTR(min_size, ListFloat) - .REQUIRED_ATTR(max_size, ListFloat) - .ATTR(aspect_ratio, ListFloat, {1.0}) - .ATTR(img_size, Int, 0) - .ATTR(img_h, Int, 0) - .ATTR(img_w, Int, 0) - .ATTR(step, Float, 0.0) - .ATTR(step_h, Float, 0.0) - .ATTR(step_w, Float, 0.0) - .ATTR(flip, Bool, true) - .ATTR(clip, Bool, false) - .ATTR(offset, Float, 0.5) - .ATTR(variance, ListFloat, {0.1}) - .OP_END_FACTORY_REG(PriorBox); - -/** -*@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox. - -*@par Inputs: -* Six inputs, including: -*@li feature: An NC1HWC0 or NCHW feature map of type is float32 or float16. -*@li img: source image. Has the same type and format as "feature". -*@li data_h: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map height. -*@li data_w: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map width. -*@li box_height: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the height of each prior box. -*@li box_width: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the width of each prior box. - -*@par Attributes: -*@li min_size: A required float32, specifying the minimum edge length of a square prior box. -*@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size) -*@li img_size: An optional int32, specifying the size of the source image. -*@li img_h: An optional int32, specifying the height of the source image. -*@li img_w: An optional int32, specifying the width of the source image. -*@li step: An optional float32, specifying the step for mapping the center point from the feature map to the source image. -*@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image. -*@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image. -*@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True". -*@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False". -*@li offset: An optional float32, specifying the offset. Defaults to "0.5". -*@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value). - -*@par Outputs: -*y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance. - -*@attention Constraints:\n -* This operator applies only to SSD networks. -*@see SSDDetectionOutput() -*/ - REG_OP(PriorBoxD) - .INPUT(feature, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) - .REQUIRED_ATTR(min_size, ListFloat) - .REQUIRED_ATTR(max_size, ListFloat) - .ATTR(img_size, Int, 0) - .ATTR(img_h, Int, 0) - .ATTR(img_w, Int, 0) - .ATTR(step, Float, 0.0) - .ATTR(step_h, Float, 0.0) - .ATTR(step_w, Float, 0.0) - .ATTR(flip, Bool, true) - .ATTR(clip, Bool, false) - .ATTR(offset, Float, 0.5) - .ATTR(variance, ListFloat, {0.1}) - .OP_END_FACTORY_REG(PriorBoxD); - - } // namespace ge - - #endif // GE_OP_PRIORBOX_H diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h old mode 100755 new mode 100644 index fe9643f4..d9cdc1fc --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -119,7 +119,22 @@ REG_OP(DropOutGenMask) .ATTR(seed2, Int, 0) .OP_END_FACTORY_REG(DropOutGenMask) +/** +*@brief Generates values in an interval. + +*@par Inputs:\n +* Four ND inputs, including: +*@li input_assist: A 1D Tensor of type float32. +*@li input_start: A 1D Tensor of type float32, for the first entry in the range. +*@li input_stop: A 1D Tensor of type float32, for the last entry in the range. +*@li input_num: A 1D Tensor of type int32, for the common difference of the entries. + +*@par Outputs:\n +*output_op: A 1D Tensor of type float32. +*@attention Constraints:\n +* "input_assist" is a sequence of "input_num" evenly-spaced values beginning at 0 with an common difference of 1. +*/ REG_OP(LinSpaceD) .INPUT(assist, TensorType({DT_FLOAT})) .INPUT(start, TensorType({DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index a74ec28c..daf82c51 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -55,6 +55,16 @@ REG_OP(BNTrainingUpdate) .OUTPUT(batch_variance, TensorType({DT_FLOAT})) .OP_END_FACTORY_REG(BNTrainingUpdate) +REG_OP(BNInfer) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(scale, TensorType({DT_FLOAT})) + .INPUT(offset, TensorType({DT_FLOAT})) + .INPUT(mean, TensorType({DT_FLOAT})) + .INPUT(variance, TensorType({DT_FLOAT})) + .REQUIRED_ATTR(epsilon, Float) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) + .OP_END_FACTORY_REG(BNInfer) + REG_OP(BNTrainingUpdateV2) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(sum, TensorType({DT_FLOAT})) @@ -212,6 +222,23 @@ REG_OP(ReduceMax) .ATTR(keep_dims, Bool, false) .OP_END_FACTORY_REG(ReduceMax) +/** +*@brief Returns the maximum of elements across dimensions of a Tensor. + +*@par Inputs: +*x: A multi-dimensional Tensor of type float16, float32, or int16. + +*@par Attributes: +* Two attributes, including: \n +*@li axis: A required listint, specifying the axis information of the index with the maximum value. +*@li keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false". + +*@par Outputs: +*y: A multi-dimensional Tensor, specifying the maximum value of the corresponding axis in the tensor. Has the same type as "x". (If "keep_dims" is set to "false", the output dimensions are reduced by "dimension" compared with that of "x". Otherwise, the output has one fewer dimension than "x".) + +*@attention Constraints: +* The value range of "axis" is [-dims, dims - 1]. "dims" indicates the dimension length of "x". +*/ REG_OP(ReduceMaxD) .INPUT(x, TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_FLOAT16, DT_INT32})) diff --git a/third_party/fwkacllib/inc/ops/reduction_ops.h b/third_party/fwkacllib/inc/ops/reduction_ops.h deleted file mode 100755 index fb4a4b59..00000000 --- a/third_party/fwkacllib/inc/ops/reduction_ops.h +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - #ifndef GE_OP_REDUCTION_H - #define GE_OP_REDUCTION_H - - #include "graph/operator_reg.h" - - namespace ge { - - REG_OP(Reduction) - .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) - .ATTR(operation, String, "SUM") - .ATTR(axis, Int, 0) - .ATTR(coeff, Float, 1.0) - .OP_END_FACTORY_REG(Reduction); - } // namespace ge - - #endif // GE_OP_REDUCTION_H diff --git a/third_party/fwkacllib/inc/ops/roipooling_ops.h b/third_party/fwkacllib/inc/ops/roipooling_ops.h deleted file mode 100755 index 35625815..00000000 --- a/third_party/fwkacllib/inc/ops/roipooling_ops.h +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_OP_ROIPOOLING_OPS_H_ -#define GE_OP_ROIPOOLING_OPS_H_ - -#include "graph/operator_reg.h" - -namespace ge { - -REG_OP(RoiPooling) - .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16})) - .INPUT(roi_actual_num, TensorType({DT_INT32})) - .ATTR(roi_max_num, Int,3008) - .REQUIRED_ATTR(pooled_h, Int) - .REQUIRED_ATTR(pooled_w, Int) - .ATTR(spatial_scale, Float, 0.0625) - .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) - .OP_END_FACTORY_REG(RoiPooling) - -} // namespace ge - -#endif // GE_OP_BITWISE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/rpn_ops.h b/third_party/fwkacllib/inc/ops/rpn_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/save_ops.h b/third_party/fwkacllib/inc/ops/save_ops.h index 3f44d1ce..a3b9d397 100644 --- a/third_party/fwkacllib/inc/ops/save_ops.h +++ b/third_party/fwkacllib/inc/ops/save_ops.h @@ -28,4 +28,4 @@ REG_OP(Save) } // namespace ge -#endif // GE_OP_SAVE_OPS_H_ +#endif // GE_OP_SAVE_OPS_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/scale_ops.h b/third_party/fwkacllib/inc/ops/scale_ops.h deleted file mode 100755 index 1910020b..00000000 --- a/third_party/fwkacllib/inc/ops/scale_ops.h +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - #ifndef GE_OP_SCALE_OPS_H - #define GE_OP_SCALE_OPS_H - - #include "graph/operator_reg.h" - - namespace ge { - - REG_OP(Scale) - .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16})) /* "First operand." */ - .INPUT(scale_param, TensorType({DT_FLOAT, DT_FLOAT16})) /* "Second operand." */ - .INPUT(bias_param, TensorType({DT_FLOAT, DT_FLOAT16})) /* "Third operand." */ - .OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16})) /* "Result, has same element type as input_x" */ - .ATTR(axis, Int, 1) - .ATTR(num_axes, Int, 1) - .ATTR(bias_term, Bool, false) - .ATTR(scale_from_blob, Bool, true) - .OP_END_FACTORY_REG(Scale) - - } // namespace ge - - #endif // GE_OP_SCALE_OPS_H diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index 3928a7b5..5b083282 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -128,7 +128,7 @@ REG_OP(TileD) * (K-1)-dimensional tensor of "indices" into "params", where each element\n * defines a slice of "params":\n * output[\\(i_0, ..., i_{K-2}\\)] = params[indices[\\(i_0, ..., i_{K-2}\\)]]\n -* "indices" defines slices into the first N dimensions of\n +* In gather_nd, "indices" defines slices into the first N dimensions of\n * "params", where\n * N = indices.shape[-1]\n * indices = [[0, 0], [1, 1]]\n @@ -766,20 +766,16 @@ REG_OP(ScatterNdD) /** * @brief Says whether the targets are in the top "k" predictions.\n -* Let "x1" be the predictions for all classes for example i, "x2(i)" be the\n -* target class for example i, y(i) be the output for example i:\n -* y(i) = x1(i, x2(i))) ��TopKIncludingTies(x1(i)) * @par Inputs: * Three inputs, including: * @li x1: A 2D Tensor of type float32. A "batch_size * classes" tensor. -* @li x2: A 1D Tensor of type IndexNumberType. A ��batch_size�� tensor of class -* ids. +* @li x2: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids. * @li k: A 1D Tensor of the same type as "x2". * Specifies the number of top elements to look at for computing precision. * @par Outputs: -* y: A Tensor of type uint8. +* y: A Tensor of type bool. * @see InTopK() */ @@ -792,26 +788,18 @@ REG_OP(InTopKExt2) /** * @brief Says whether the targets are in the top "k" predictions\n -* Let "x1" be the predictions for all classes for example i, "x2(i)" be the\n -* target class for example i, y(i) be the output for example i:\n -* y(i) = x1(i, x2(i))) ��TopKIncludingTies(x1(i)) * @par Inputs: * Two inputs, including: * @li x1: A 2D Tensor of type float32. A "batch_size * classes" tensor. -* @li x2: A 1D Tensor of type IndexNumberType. A ��batch_size�� tensor of class -* ids. +* @li x2: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids. * @par Attributes: -* @li k: An int32, specifying the number of top elements to look at for +* @li k: An optional int32, specifying the number of top elements to look at for * computing precision. * @par Outputs: -* y: A Tensor of type uint8. - -* @attention Constraints: - -* @see InTopKEx2() +* y: A Tensor of type bool. */ REG_OP(InTopK) .INPUT(x1, TensorType({DT_FLOAT})) @@ -1295,386 +1283,6 @@ REG_OP(UnsortedSegmentProdD) .OP_END_FACTORY_REG(UnsortedSegmentProdD) /** -*@brief Normalizes data. It is called Region on YOLO v2 and Yolo on YOLO v3. - -*@par Inputs: -*x: An NCHW tensor of type float16 or float32. The data is with shape (N, boxes*(coords+obj+classes), H, W),where, "obj" indicates the confidence of an object, and only one confidence is supported. Boxes are arranged as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn. - -*@par Attributes: -*@li boxes: A required int32, specifying the number of anchor boxes. Defaults to "5" for V2 or "3" for V3. -*@li coords: An int32, specifying the number of parameters required for locating an object. The value is fixed at "4", corresponding to (x,y,w,h). -*@li classes: An int32, specifying the number of prediction classes. Defaults to "80". The value range is [1, 1024]. -*@li yolo_version: A string, specifying the YOLO version, either "V2" or "V3". -*@li softmax: A bool, specifying whether to perform softmax, valid only when "yolo_version = V2". -*@li background: A bool, specifying the operation types of the obj and classes, used in conjunction with "softmax" and valid only when "yolo_version = V2". - -*@par Outputs: -*@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2], where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box. -*@li obj_data: A float16 or float32 with shape [N, ceilx(boxes*height*width *2+32, 32)/2], where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence. -*@li classes_data: A float16 or float32 with shape [N, classes, ceilx(boxes*height*width *2+32, 32)/2], where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes. - -*@attention Constraints: -*@li This operator applies to YOLO v2 and v3 networks. -*@li The succeeding layer of the Yolo operator must be operator Yolov3DetectionOutput. -*/ -REG_OP(Yolo) - .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) - .OUTPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT})) - .OUTPUT(obj_data, TensorType({DT_FLOAT16,DT_FLOAT})) - .OUTPUT(classes_data, TensorType({DT_FLOAT16,DT_FLOAT})) - .ATTR(boxes, Int, 3) - .ATTR(coords, Int, 4) - .ATTR(classes, Int, 80) - .ATTR(yolo_version, String, "V3") - .ATTR(softmax, Bool, false) - .ATTR(background, Bool, false) - .OP_END_FACTORY_REG(Yolo) - -/** -*@brief Performs YOLO V3 detection. - -*@par Inputs: -*Ten inputs, including: -*@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n -There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo. -*@li imginfo: A float16, describing the image information including the required image height and width \n -and the actual image height and width. -* -*@par Attributes: -*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" -*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. -*@li coords: Specifies the number of coordinate parameters. Must be 4. -*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80]. -*@li relative: An optional bool. Defaults to and must be "true". -*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0]. - -*@li post_top_k: An optional int32. This attribute is reserved. -*@li classes_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. - -*@li nms_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n - -*@li max_box_number_per_batch: An optional int, specifying the maximum number of output boxes per batch. Defaults to "1024". -*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "1024". -* -*@par Outputs: -*@li boxout: An NCHW tensor of type float16, describing the information of each output box, including the coordinates, class, and confidence. -*@li boxoutnum: An NCHW tensor of type int32, specifying the number of output boxes. - -*@attention Constraints:\n -*@li This operator applies only to the YOLO v3 network. -*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators. -*/ -REG_OP(YoloV3DetectionOutput) - .INPUT(coord_data1, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(coord_data2, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(coord_data3, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_data1, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_data2, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_data3, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_data1, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_data2, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_data3, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT})) - .REQUIRED_ATTR(biases1, ListFloat) - .REQUIRED_ATTR(biases2, ListFloat) - .REQUIRED_ATTR(biases3, ListFloat) - .ATTR(boxes, Int, 3) - .ATTR(coords, Int, 4) - .ATTR(classes, Int, 80) - .ATTR(relative, Bool, true) - .ATTR(obj_threshold, Float, 0.5) - .ATTR(post_top_k, Int, 1024) - .ATTR(classes_threshold, Float, 0.5) - .ATTR(nms_threshold, Float, 0.45) - .ATTR(max_box_number_per_batch, Int, 1024) - .ATTR(pre_nms_topn, Int, 512) - .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) - .OUTPUT(box_out_num, TensorType({DT_INT32})) - .OP_END_FACTORY_REG(YoloV3DetectionOutput) - -/** -*@brief Performs YOLO V3 detection. - -*@par Inputs: -*16 Input, including: -*@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput. \n -A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. -*@li imginfo: A float16, describing the image information including the required image height and width \n -and the actual image height and width. -*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively. - -*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively. - -* -*@par Attributes: -*@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" -*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. -*@li coords: Specifies the number of coordinate parameters. Must be 4. -*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80]. -*@li relative: An optional bool. Defaults to and must be "true". -*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0]. -*@li post_top_k: An optional int32. This attribute is reserved. -*@li classes_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. -*@li nms_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n -*@li max_box_number_per_batch: An optional int, specifying the maximum number of output boxes per batch. Defaults to "1024". -*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "1024". -* -*@par Outputs: -*@li boxout: An NCHW tensor of type float16, describing the information of each output box, including the coordinates, class, and confidence. -*@li boxoutnum: An NCHW tensor of type int32, specifying the number of output boxes. - -*@attention Constraints:\n -*@li This operator applies only to the YOLO v3 network. -*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators. -*/ -REG_OP(YoloV3DetectionOutputD) - .INPUT(coord_data1, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(coord_data2, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(coord_data3, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_data1, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_data2, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_data3, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_data1, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_data2, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_data3, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(windex1, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(windex2, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(windex3, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(hindex1, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(hindex2, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(hindex3, TensorType({DT_FLOAT16,DT_FLOAT})) - .REQUIRED_ATTR(biases1, ListFloat) - .REQUIRED_ATTR(biases2, ListFloat) - .REQUIRED_ATTR(biases3, ListFloat) - .ATTR(boxes, Int, 3) - .ATTR(coords, Int, 4) - .ATTR(classes, Int, 80) - .ATTR(relative, Bool, true) - .ATTR(obj_threshold, Float, 0.5) - .ATTR(post_top_k, Int, 1024) - .ATTR(classes_threshold, Float, 0.5) - .ATTR(nms_threshold, Float, 0.45) - .ATTR(max_box_number_per_batch, Int, 1024) - .ATTR(pre_nms_topn, Int, 512) - .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) - .OUTPUT(box_out_num, TensorType({DT_INT32})) - .OP_END_FACTORY_REG(YoloV3DetectionOutputD) - -/** -*@brief Performs object detection. - -*@par Inputs: -*@li cls_prob: An NCHW tensor of type float16 or float32, specifying the probability of the proposal is the background class. -*@li bbox_pred: An NCHW tensor of type float16 or float32, specifying the coordinates of the proposals bounding boxes. - -*@par Attributes: -*@li im_info: A required list of floats, specifying the Image information. The value range is [1, 4096]. -*@li feat_stride: A required float32, specifying the stride of the sliding window. Must be greater than "0". Defaults to "16". -*@li base_size: A required float32, specifying the size of the generated base box. Must be greater than "0". Defaults to "16". -*@li min_size: A required float32, specifying the minimum edge length of a proposal. A box with any edge less than this value is removed. Must be greater than "0". Defaults to "16". -*@li ratio: A required list of floats, specifying the aspect ratio of the generated base box. Defaults to [0.5, 1, 2]. -*@li scale: A required list of floats, specifying the ratio of the size of the generated base box to "base_size". Defaults to [8, 16, 32]. -*@li pre_nms_topn: A required int, specifying top K boxes before NMS. For float16 input, pre_nms_topn <= 6000. For float32 input, pre_nms_topn <= 3000. Defaults to "3000". -*@li post_nms_topn: A required int, specifying the number of boxes to be output after NMS. The value is a multiple of 16. For float16 input, post_nms_topn <= 6000. For float32 input, post_nms_topn <= 3000 (the maximum multiple of 16 is 2992 within the range). Defaults to "304". -*@li nms_thresh: A required float32, specifying the NMS threshold. The value range is (0,1]. Defaults to "0.7". - -*@par Outputs: -*@li rois: A Tensor with shape [batch, 5, post_nms_topn], of type float16, specifying the output box information. "post_nms_topn" must be a multiple of 16. The dimension "5" indicates (batchID, x1, y1, x2, y2). The number of BBoxes output per batch is determined by "actual_rois_num". -*@li actual_rois_num: A Tensor with shape [batch, 8], of type int32, specifying the number of BBoxes output per batch. -*/ - REG_OP(Proposal) - .INPUT(cls_prob, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(bbox_pred, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(actual_rois_num, TensorType({DT_INT32})) - .ATTR(im_info, ListFloat, {375, 1240}) - .ATTR(feat_stride, Float, 16) - .ATTR(base_size, Float, 16) - .ATTR(min_size, ListFloat, {16, 16}) - .ATTR(ratio, ListFloat, {0.5, 1, 2}) - .ATTR(scale, ListFloat, {8, 16, 32}) - .ATTR(pre_nms_topn, Int, 6000) - .ATTR(post_nms_topn, Int, 304) - .ATTR(nms_thresh, Float, 0.7) - .OP_END_FACTORY_REG(Proposal) - -/** -*@brief Performs object detection. Different from Proposal, this is an internal API called after FE fusion and has an additional "rpn_bbox" attribute. The suffix "D" in the API name will be removed from the generated model. - -*@par Inputs: -*@li cls_prob: An NCHW tensor of type float16, specifying the probability of the proposal is the background class. -*@li bbox_pred: An NCHW tensor of type float16, specifying the coordinates of the proposals bounding boxes. -*@li rpn_bbox: An NCHW tensor of type float16, specifying the coordinates of the proposals bounding boxes. - -*@par Attributes: -*@li im_info: A required list of floats, specifying the Image information. The value range is [1, 4096]. -*@li feat_stride: A required float32, specifying the stride of the sliding window. Must be greater than "0". Defaults to "16". -*@li base_size: A required float32, specifying the size of the generated base box. Must be greater than "0". Defaults to "16". -*@li min_size: A required float32, specifying the minimum edge length of a proposal. A box with any edge less than this value is removed. Must be greater than "0". Defaults to "16". -*@li ratio: A required list of floats, specifying the aspect ratio of the generated base box. Defaults to [0.5, 1, 2]. -*@li scale: A required list of floats, specifying the ratio of the size of the generated base box to "base_size". Defaults to [8, 16, 32]. -*@li pre_nms_topn: A required int, specifying top K boxes before NMS. For float16 input, pre_nms_topn <= 6000. For float32 input, pre_nms_topn <= 3000. Defaults to "3000". -*@li post_nms_topn: A required int, specifying the number of boxes to be output after NMS. The value is a multiple of 16. For float16 input, post_nms_topn <= 6000. For float32 input, post_nms_topn <= 3000 (the maximum multiple of 16 is 2992 within the range). Defaults to "304". -*@li nms_thresh: A required float32, specifying the NMS threshold. The value range is (0,1]. Defaults to 0.7. - -*@par Outputs: -*@li rois: A Tensor with shape [batch, 5, post_nms_topn], of type float16, specifying the output box information. "post_nms_topn" must be a multiple of 16. The dimension "5" indicates (batchID, x1, y1, x2, y2). The number of BBoxes output per batch is determined by "actual_rois_num". -*@li actual_rois_num: A Tensor with shape [batch, 8], of type int32, specifying the number of BBoxes output per batch. -*/ -REG_OP(ProposalD) - .INPUT(cls_prob, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(bbox_pred, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(rpn_bbox, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(actual_rois_num, TensorType({DT_INT32})) - .ATTR(im_info, ListFloat, {375, 1240}) - .ATTR(feat_stride, Float, 16) - .ATTR(base_size, Float, 16) - .ATTR(min_size, ListFloat, {16, 16}) - .ATTR(ratio, ListFloat, {0.5, 1, 2}) - .ATTR(scale, ListFloat, {8, 16, 32}) - .ATTR(pre_nms_topn, Int, 6000) - .ATTR(post_nms_topn, Int, 304) - .ATTR(nms_thresh, Float, 0.7) - .OP_END_FACTORY_REG(ProposalD) - -/** -*@brief Performs YOLO V2 detection. - -*@par Inputs: -* Four inputs, including: -*@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov3DetectionOutput. \n -Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. -*@li imginfo: A float16, describing the image information including the required image height and width \n -and the actual image height and width. -* -*@par Attributes: -*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" -*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. -*@li coords: Specifies the number of coordinate parameters. Must be 4. -*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80]. -*@li relative: An optional bool. Defaults to and must be "true". -*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0]. - -*@li post_top_k: An optional int32. This attribute is reserved. -*@li classes_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. -*@li nms_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n -*@li max_box_number_per_batch: An optional int, specifying the maximum number of output boxes per batch. Defaults to "1024". -*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "1024". -* -*@par Outputs: -*@li boxout: An NCHW tensor of type float16, describing the information of each output box, including the coordinates, class, and confidence. -*@li boxoutnum: An NCHW tensor of type int32, specifying the number of output boxes. - -*@attention Constraints:\n -*@li This operator applies only to the YOLO v2 network. -*@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator. -*/ -REG_OP(YoloV2DetectionOutput) - .INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_data, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_data, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT})) - .REQUIRED_ATTR(biases, ListFloat) - .ATTR(boxes, Int, 5) - .ATTR(coords, Int, 4) - .ATTR(classes, Int, 80) - .ATTR(relative, Bool, true) - .ATTR(obj_threshold, Float, 0.5) - .ATTR(post_top_k, Int, 1024) - .ATTR(classes_threshold, Float, 0.5) - .ATTR(nms_threshold, Float, 0.45) - .ATTR(max_box_number_per_batch, Int, 1024) - .ATTR(pre_nms_topn, Int, 512) - .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) - .OUTPUT(box_out_num, TensorType({DT_INT32})) - .OP_END_FACTORY_REG(YoloV2DetectionOutput) - -/** -*@brief Performs YOLO V2 detection. - -*@par Inputs: -*Six inputs, including: -*@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov2DetectionOutput. \n -Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. -*@li imginfo: A float16, describing the image information including the required image height and width \n -and the actual image height and width. -*@li windex: A windex tensor with shape [height, weight]. Has the same type as the inputs. [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed. \n - -*@li hindex: A hindex tensor with shape [height, weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]]. \n - -* -*@par Attributes: -*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" -*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. -*@li coords: Specifies the number of coordinate parameters. Must be 4. -*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80]. -*@li relative: An optional bool. Defaults to and must be "true". -*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0]. -*@li post_top_k: An optional int32. This attribute is reserved. -*@li classes_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. - -*@li nms_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n -*@li max_box_number_per_batch: An optional int, specifying the maximum number of output boxes per batch. Defaults to "1024". -*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "1024". -* -*@par Outputs: -*@li boxout: An NCHW tensor of type float16, describing the information of each output box, including the coordinates, class, and confidence. -*@li boxoutnum: An NCHW tensor of type int32, specifying the number of output boxes. -* -*@attention Constraints:\n -*@li This operator applies only to the YOLO v2 network. -*@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator. -*/ -REG_OP(YoloV2DetectionOutputD) - .INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(obj_data, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(classes_data, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT})) - .REQUIRED_ATTR(biases, ListFloat) - .ATTR(boxes, Int, 5) - .ATTR(coords, Int, 4) - .ATTR(classes, Int, 80) - .ATTR(relative, Bool, true) - .ATTR(obj_threshold, Float, 0.5) - .ATTR(post_top_k, Int, 1024) - .ATTR(classes_threshold, Float, 0.5) - .ATTR(nms_threshold, Float, 0.45) - .ATTR(max_box_number_per_batch, Int, 1024) - .ATTR(pre_nms_topn, Int, 512) - .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT})) - .OUTPUT(box_out_num, TensorType({DT_INT32})) - .OP_END_FACTORY_REG(YoloV2DetectionOutputD) - -/** -*@brief Performs plane or channel conversion on YoloV2. -* If reverse=true: (N, H, W, C)->(N, H*stride, W*stride, C/(stride*stride)) -* If reverse=false: (N, H, W, C)->(N, H/stride, W/stride, C*(stride*stride)) - -*@par Inputs: -*x: An (N, H, W, C) tensor. All data types are supported. - -*@par Attributes: -*@li stride: An optional int32, specifying the plane or channel scaling factor. Defaults to "2". -*@li reverse: An optional bool, specifying the conversion mode. If "true", depth to space conversion is performed. If "false", space to depth conversion is performed. Defaults to "false". - -*@par Outputs: -*y: An (N, H, W, C) tensor. All data types are supported. - -*@attention Constraints: -*@li If reverse=true: C/(stride*stride) yields an integer result. If reverse=false: W/stride and H/stride yield integer results. -*/ -REG_OP(PassThrough) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64})) - .ATTR(stride, Int, 2) - .ATTR(reverse, Bool, false) - .OP_END_FACTORY_REG(PassThrough) - -/** *@brief Crops the input. *@par Inputs: @@ -1700,36 +1308,5 @@ REG_OP(Crop) .ATTR(axis, Int, 2) .REQUIRED_ATTR(offsets, ListInt) .OP_END_FACTORY_REG(Crop) - -/** -*@brief Extends the input with copies of data along a specified dimension. For example: \n -(1) If x = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], with shape (2, 3, 2);\n -(2) axis = 1;\n -(3) tiles = 2;\n -(4) Then, y = [[[1, 2], [3, 4], [5, 6], [1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12], [7, 8], [9, 10], [11, 12]]], with shape (2, 6, 2). - -*@par Inputs: -* One input: -*input_x: A Tensor with any format. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. - -*@par Attributes: -*@li axis: An optional int32, specifying the axis to tile. Defaults to 1. -*@li tiles: A required int32, specifying the number of copies (tiles) to output. - -*@par Outputs: -*output_y: A Tensor of any format. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. - -*@attention Constraints:\n -*@li "axis" must be within the rank of the input tensor. -*@li "tiles" must be greater than 1. -*/ -REG_OP(TileV2) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT64, DT_INT32, - DT_INT16, DT_INT8, DT_UINT64, DT_UINT32, DT_UINT16, DT_UINT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT64, DT_INT32, - DT_INT16, DT_INT8, DT_UINT64, DT_UINT32, DT_UINT16, DT_UINT8})) - .ATTR(axis, Int, 1) - .REQUIRED_ATTR(tiles, Int) - .OP_END_FACTORY_REG(TileV2) } // namespace ge #endif // GE_OP_SELECTION_OPS_H diff --git a/third_party/fwkacllib/inc/ops/set_ops.h b/third_party/fwkacllib/inc/ops/set_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/shuffle_channel_ops.h b/third_party/fwkacllib/inc/ops/shuffle_channel_ops.h deleted file mode 100755 index b9f609fc..00000000 --- a/third_party/fwkacllib/inc/ops/shuffle_channel_ops.h +++ /dev/null @@ -1,31 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - #ifndef GE_OP_SHUFFLE_CHANNEL_OPS_H - #define GE_OP_SHUFFLE_CHANNEL_OPS_H - - #include "graph/operator_reg.h" - - namespace ge { - - REG_OP(ShuffleChannel) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64})) - .ATTR(group, Int, 1) - .OP_END_FACTORY_REG(ShuffleChannel) - } // namespace ge - - #endif // GE_OP_SHUFFLE_CHANNEL_OPS_H diff --git a/third_party/fwkacllib/inc/ops/sparse_ops.h b/third_party/fwkacllib/inc/ops/sparse_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/state_ops.h b/third_party/fwkacllib/inc/ops/state_ops.h old mode 100755 new mode 100644 diff --git a/third_party/fwkacllib/inc/ops/stateless_random_ops.h b/third_party/fwkacllib/inc/ops/stateless_random_ops.h index 5827ba9b..c793574b 100644 --- a/third_party/fwkacllib/inc/ops/stateless_random_ops.h +++ b/third_party/fwkacllib/inc/ops/stateless_random_ops.h @@ -32,4 +32,4 @@ REG_OP(StatelessMultinomial) } // namespace ge -#endif //GE_OP_STATELESS_RANDOM_OPS_H +#endif //GE_OP_STATELESS_RANDOM_OPS_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/threshold_ops.h b/third_party/fwkacllib/inc/ops/threshold_ops.h deleted file mode 100755 index f692ad3c..00000000 --- a/third_party/fwkacllib/inc/ops/threshold_ops.h +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - #ifndef GE_OP_THRESHOLD_H - #define GE_OP_THRESHOLD_H - - #include "graph/operator_reg.h" - - namespace ge { - - REG_OP(Threshold) - .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT32})) - .OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT32})) - .ATTR(threshold, Float, 0.0) - // .INFER_SHAPE_AND_TYPE(ThresholdInferShape) - .OP_END_FACTORY_REG(Threshold); - - } // namespace ge - - #endif // GE_OP_THRESHOLD_OPS_H diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index 3a421a72..a821656a 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -111,6 +111,22 @@ REG_OP(SpaceToDepth) .ATTR(data_format, String, "NHWC") .OP_END_FACTORY_REG(SpaceToDepth) +/** +*@brief Rearranges data from depth into blocks of spatial data. + +*@par Inputs: +*x: A Tensor. Must be one of the following types: float16, float32, double, int32, uint8, +* int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, +* complex128, uint32, uint64 + +*@par Attributes: +*Two attributes, including: +* @li block_size: An int >= 2, specifying the size of the spatial block. +* @li data_format: An optional string, specifying the data format. Defaults to "NHWC". + +*@par Outputs: +*y: A Tensor of the same type as "x". +*/ REG_OP(DepthToSpace) .INPUT(x, TensorType::BasicType()) .OUTPUT(y, TensorType::BasicType()) @@ -230,15 +246,18 @@ REG_OP(Unpack) * images: A 4D Tensor with shape [batch, in_rows, in_cols, depth]. * @par Attributes: -* @li ksizes: The size of the sliding window for each dimension of images. -* @li strides: How far the centers of two consecutive patches are in the images.\n +* @li ksizes: An optional tuple or list. size of the sliding window for +* each dimension of images. +* @li strides: An optional tuple or list. How far the centers of two +* consecutive patches are in the images.\n * Must be: [1, stride_rows, stride_cols, 1]. -* @li rates: Must be: [1, rate_rows, rate_cols, 1]. This is the input stride,\n +* @li rates: Must be: An optional tuple or list. [1, rate_rows, rate_cols, 1]. +* This is the input stride,\n * specifying how far two consecutive patch samples are in the input. Equivalent\n * to extracting patches with patch_sizes_eff = patch_sizes + (patch_sizes - 1) *\n * (rates - 1), followed by subsampling them spatially by a factor of rates. This\n * is equivalent to rate in dilated (a.k.a. Atrous) convolutions. -* @li padding: The type of padding algorithm to use. +* @li padding: An optional string. The type of padding algorithm to use. * @par Outputs: * Output: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows *\n @@ -258,6 +277,20 @@ REG_OP(ExtractImagePatches) .ATTR(padding, String, "SAME") .OP_END_FACTORY_REG(ExtractImagePatches) +/** +*@brief Confuse reshape and transpose. + +*@par Inputs: +*x: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. + +*@par Attributes: +*@li perm: A permutation of the dimensions of "x". +*@li shape: The shape of the input. +*@li transpose_first: If True, the transpose is first, otherwise the reshape is first. + +*@par Outputs: +*y: A Tensor. Has the same type as "x". +*/ REG_OP(ConfusionTransposeD) .INPUT(x, TensorType::BasicType()) .OUTPUT(y, TensorType::BasicType()) @@ -266,6 +299,20 @@ REG_OP(ConfusionTransposeD) .REQUIRED_ATTR(transpose_first, Bool) .OP_END_FACTORY_REG(ConfusionTransposeD) +/** +*@brief Confuse reshape and transpose. + +*@par Inputs: +*@li x: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. +*@li shape: The shape of the input. + +*@par Attributes: +*@li perm: A permutation of the dimensions of "x". +*@li transpose_first: If True, the transpose is first, otherwise the reshape is first. + +*@par Outputs: +*y: A Tensor. Has the same type as "x". +*/ REG_OP(ConfusionTranspose) .INPUT(x, TensorType::BasicType()) .INPUT(shape, TensorType::IndexNumberType()) @@ -274,14 +321,6 @@ REG_OP(ConfusionTranspose) .REQUIRED_ATTR(transpose_first, Bool) .OP_END_FACTORY_REG(ConfusionTranspose) -REG_OP(FlattenV2) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, - DT_INT32, DT_UINT32, DT_INT64, DT_UINT64})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, - DT_INT32, DT_UINT32, DT_INT64, DT_UINT64})) - .ATTR(axis, Int, 1) - .ATTR(end_axis, Int, -1) - .OP_END_FACTORY_REG(FlattenV2) } // namespace ge #endif // GE_OP_TRANSFORMATION_OPS_H diff --git a/third_party/fwkacllib/inc/register/op_registry.h b/third_party/fwkacllib/inc/register/op_registry.h index 3bfa8f88..74473868 100644 --- a/third_party/fwkacllib/inc/register/op_registry.h +++ b/third_party/fwkacllib/inc/register/op_registry.h @@ -21,9 +21,27 @@ #include #include #include + #include "register/register.h" namespace domi { +enum OmgMoveTypeToAttr { + OMG_MOVE_TYPE_DTYPE = 0, + OMG_MOVE_TYPE_VALUE, + OMG_MOVE_TYPE_SHAPE, + OMG_MOVE_TYPE_FORMAT, + OMG_MOVE_TYPE_AXIS, + OMG_MOVE_TYPE_SCALAR_VALUE, + OMG_REMOVE_TYPE_WITH_COND = 1000, +}; + +struct MoveInputToAttrStu { + int inputIdx; + std::string attrName; + OmgMoveTypeToAttr moveType; + bool attrValue; +}; + class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry { public: static OpRegistry *Instance(); @@ -36,75 +54,18 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry { void GetOpTypeByImplyType(std::vector &vec_op_type, const domi::ImplyType &imply_type); - void GetFormats(const std::string &op_type, std::vector &input_format_vector, - std::vector &output_format_vector); - - void GetWeightFormats(const std::string &op_type, std::vector &format_vector); - domi::ParseParamFunc GetParseParamFunc(const std::string &op_type); - domi::InferShapeFunc GetInferShapeFunc(const std::string &op_type); - - domi::InferShapeFuncV2 GetInferShapeFuncV2(const std::string &op_type); - - domi::GetWorkspaceSizeFunc GetGetWorkspaceSizeFunc(const std::string &op_type); - - domi::UpdateOpDescFunc GetUpdateOpDescFunc(const std::string &op_type); - - domi::BuildTeBinFunc GetBuildTeBinFunc(const std::string &op_type); - domi::ImplyType GetImplyTypeByOriOpType(const std::string &ori_optype); - void GetSupportedInputFormats(const std::string &opType, std::vector> &suportedInputFormats); - - void GetSupportedOutputFormats(const std::string &opType, - std::vector> &supportedOutputFormats); - - void GetSupportedInputTypes(const std::string &opType, - std::vector> &suportedInputDataTypes); - void GetSupportedInputTypesByOriginOpType(const std::string &opType, - std::vector> &suportedInputDataTypes); - - void GetSupportedOutputTypes(const std::string &opType, - std::vector> &supportedOutputDataTypes); - void GetSupportedOutputTypesByOriginOpType(const std::string &opType, - std::vector> &supportedOutputDataTypes); - - void GetLimitedInputTensorDescs(const std::string &opType, - std::vector> &inputLimitedTensorDescs); - void GetLimitedInputTensorDescsByOriginOpType(const std::string &opType, - std::vector> &inputLimitedTensorDescs); - - void GetLimitedOutputTensorDescs(const std::string &opType, - std::vector> &outputLimitedTensorDescs); - void GetLimitedOutputTensorDescsByOriginOpType( - const std::string &opType, std::vector> &outputLimitedTensorDescs); - const std::vector &GetConstInputToAttr(const std::string &ori_optype) const; private: std::unordered_map> op_ori_optype_map_; std::unordered_map op_run_mode_map_; - std::unordered_map> op_input_formats_map_; - std::unordered_map> op_output_formats_map_; - std::unordered_map> op_weight_formats_map_; std::unordered_map opParseParamsFnMap_; - std::unordered_map opInferShapeFnMap_; - std::unordered_map opInferShapeFnMapV2_; - std::unordered_map opGetWorkspaceSizeFnMap_; - std::unordered_map opUpdateOpDescFnMap_; - std::unordered_map opBuildTeBinFnMap_; std::unordered_map> opConstInputToAttrMap_; - - std::unordered_map>> opInputSupportedFormats_; - std::unordered_map>> opOutputSupportedFormats_; - std::unordered_map>> opInputSupportedDataTypes_; - std::unordered_map>> opOutputSupportedDataTypes_; - std::unordered_map>> opInputLimitedTensorDescs_; - std::unordered_map>> opOutputLimitedTensorDescs_; - std::unordered_map originOpType2OmOpType_; }; } // namespace domi - #endif // INC_REGISTER_OP_REGISTRY_H_ diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index eaf44a41..724141cc 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -21,7 +21,7 @@ #ifdef __cplusplus extern "C" { -#endif +#endif // __cplusplus // If you need export the function of this library in Win32 dll, use __declspec(dllexport) #ifndef RTS_API @@ -29,8 +29,8 @@ extern "C" { #define RTS_API __declspec(dllexport) #else #define RTS_API -#endif -#endif +#endif // RTS_DLL_EXPORT +#endif // RTS_API /** * @ingroup dvrt_base @@ -65,7 +65,8 @@ typedef enum tagRtError { RT_ERROR_MODEL_STREAM_EXE_FAILED = 0x91, // the model stream failed RT_ERROR_MODEL_LOAD_FAILED = 0x94, // the model stream failed RT_ERROR_END_OF_SEQUENCE = 0x95, // end of sequence - + RT_ERROR_NO_STREAM_CB_REG = 0x96, // no callback register info for stream + RT_ERROR_DATA_DUMP_LOAD_FAILED = 0x97, // data dump load info fail RT_ERROR_RESERVED } rtError_t; diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index 131543c0..c7301a99 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -21,7 +21,7 @@ #ifdef __cplusplus extern "C" { -#endif +#endif // __cplusplus #define PLAT_COMBINE(arch, chip, ver) ((arch << 16) | (chip << 8) | (ver)) #define PLAT_GET_ARCH(type) ((type >> 16) & 0xffff) @@ -114,9 +114,7 @@ typedef struct tagRtMemoryConfig { uint32_t compilerSize; } rtMemoryConfig_t; -typedef struct tagRtPlatformConfig { - uint32_t platformConfig; -} rtPlatformConfig_t; +typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t; /** * @ingroup diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h index b91b1173..016abec1 100644 --- a/third_party/fwkacllib/inc/runtime/context.h +++ b/third_party/fwkacllib/inc/runtime/context.h @@ -21,7 +21,7 @@ #ifdef __cplusplus extern "C" { -#endif +#endif // __cplusplus /** * @ingroup rt_context diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index 47ff2629..b171ff73 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -21,7 +21,7 @@ #ifdef __cplusplus extern "C" { -#endif +#endif // __cplusplus typedef struct tagRTDeviceInfo { uint8_t env_type; /* 0: FPGA 1: EMU 2: ESL */ diff --git a/third_party/fwkacllib/inc/runtime/dvfsprofile.h b/third_party/fwkacllib/inc/runtime/dvfsprofile.h index 11081546..c896a31e 100644 --- a/third_party/fwkacllib/inc/runtime/dvfsprofile.h +++ b/third_party/fwkacllib/inc/runtime/dvfsprofile.h @@ -21,7 +21,7 @@ #ifdef __cplusplus extern "C" { -#endif +#endif // __cplusplus typedef enum dvfsProfileMode { DVFS_PROFILE_PERFORMANCE_PRIORITY, diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h index 08fb98ec..52b04f7f 100644 --- a/third_party/fwkacllib/inc/runtime/event.h +++ b/third_party/fwkacllib/inc/runtime/event.h @@ -21,7 +21,7 @@ #ifdef __cplusplus extern "C" { -#endif +#endif // __cplusplus /** * @ingroup dvrt_event diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index 8dd0e0e3..ed076a8b 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -22,9 +22,9 @@ #ifdef __cplusplus extern "C" { -#endif - +#endif // __cplusplus +/*lint -e148*/ /** * @ingroup rt_kernel * @brief shared memory data control @@ -41,7 +41,7 @@ typedef struct tagRtSmData { uint8_t reserved[2]; // reserved } rtSmData_t; - +/*lint -e148*/ /** * @ingroup rt_kernel * @brief shared memory description diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index c1ec7776..65c6dc61 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -17,14 +17,16 @@ #ifndef __CCE_RUNTIME_MEM_H__ #define __CCE_RUNTIME_MEM_H__ +/*lint -e7*/ #include +/*lint +e7*/ #include "base.h" #include "config.h" #include "stream.h" #ifdef __cplusplus extern "C" { -#endif +#endif // __cplusplus /** * @ingroup dvrt_mem diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index 4e875107..8fe94424 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -21,7 +21,7 @@ #ifdef __cplusplus extern "C" { -#endif +#endif // __cplusplus typedef enum tagModelTaskType { RT_MODEL_TASK_KERNEL = 0, @@ -92,6 +92,7 @@ typedef struct tagAicpuModelInfo { uint64_t aicpuTaskPtr; } rtAicpuModelInfo_t; +/* lint -e148 */ typedef struct tagKernelTaskInfo { uint16_t blockDim; uint16_t argsCount; @@ -101,7 +102,7 @@ typedef struct tagKernelTaskInfo { uint8_t *smDesc; uint8_t *args; uint16_t *argsOffset; -} rtKernelTaskInfo_t; +} rtKernelTaskInfo_t; /* lint +e148 */ typedef struct tagKernelTaskInfoEx { uint32_t flags; diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h index 83bb4b63..0b5ce843 100644 --- a/third_party/fwkacllib/inc/runtime/stream.h +++ b/third_party/fwkacllib/inc/runtime/stream.h @@ -22,7 +22,7 @@ #ifdef __cplusplus extern "C" { -#endif +#endif // __cplusplus /** * @ingroup stream_flags diff --git a/third_party/fwkacllib/inc/tdt/data_common.h b/third_party/fwkacllib/inc/tdt/data_common.h index 6ea9362f..81f79346 100644 --- a/third_party/fwkacllib/inc/tdt/data_common.h +++ b/third_party/fwkacllib/inc/tdt/data_common.h @@ -72,4 +72,4 @@ struct DataItem { std::shared_ptr dataPtr_; /**< Data pointer*/ }; } // namespace tdt -#endif +#endif // HOST_INNER_INC_DATA_COMMON_H_ diff --git a/third_party/fwkacllib/inc/tdt/status.h b/third_party/fwkacllib/inc/tdt/status.h index 1be204e6..50a656c9 100644 --- a/third_party/fwkacllib/inc/tdt/status.h +++ b/third_party/fwkacllib/inc/tdt/status.h @@ -331,23 +331,24 @@ enum { #define TDT_GET_ERROR_STR(CODE_NAME) (tdt::StatusFactory::GetInstance()->GetErrDesc(CODE_NAME)) #endif -constexpr uint16_t MODID_TDT_CLIENT = 0x0101; -constexpr uint16_t MODID_TSD_SERVER = 0x0102; // TSD_SERVER -constexpr uint16_t MODID_HDC = 0x0103; // HDC_SERVER -constexpr uint16_t MODID_TDT_SHUFFLE = 0x0104; -constexpr uint16_t MODID_TDT_PREFETCH = 0x0105; -constexpr uint16_t MODID_TDT_TRANSFER = 0x0106; -constexpr uint16_t MODID_TDT_SUPERVISOR = 0x0107; -constexpr uint16_t MODID_MEM_POOL = 0x0108; // MEMORY_POOL -constexpr uint16_t MODID_PPC = 0x0109; // TDT PPC -constexpr uint16_t MODID_TDT_FILE = 0x0110; -constexpr uint16_t MODID_HDC_SERVER = 0x0111; -constexpr uint16_t MODID_TDT_SERVER = 0x0112; -constexpr uint16_t MODID_HDC_CLIENT = 0x0113; -constexpr uint16_t MODID_TSD_CLIENT = 0x0114; -constexpr uint16_t MODID_CHECKSUM = 0x0115; -constexpr uint16_t MODID_TDT_MONITOR = 0x0116; -constexpr uint16_t MODID_TDT_HOST = 0x0117; +// Register module id: 0xAABB, AA means system level number, BB means module level number +constexpr uint16_t MODID_TDT_CLIENT = 0x0101; // TDT_CLIENT module ID +constexpr uint16_t MODID_TSD_SERVER = 0x0102; // TSD_SERVER +constexpr uint16_t MODID_HDC = 0x0103; // HDC_SERVER +constexpr uint16_t MODID_TDT_SHUFFLE = 0x0104; // TDT shuffle module ID +constexpr uint16_t MODID_TDT_PREFETCH = 0x0105; // TDT prefetch module ID +constexpr uint16_t MODID_TDT_TRANSFER = 0x0106; // TDT TrainDataTransfer module ID +constexpr uint16_t MODID_TDT_SUPERVISOR = 0x0107; // TDT supervisor模块ID +constexpr uint16_t MODID_MEM_POOL = 0x0108; // MEMORY_POOL +constexpr uint16_t MODID_PPC = 0x0109; // TDT PPC +constexpr uint16_t MODID_TDT_FILE = 0x0110; // TDT file operation module ID +constexpr uint16_t MODID_HDC_SERVER = 0x0111; // HDC_SERVER module ID +constexpr uint16_t MODID_TDT_SERVER = 0x0112; // TDTServer module ID +constexpr uint16_t MODID_HDC_CLIENT = 0x0113; // HDC_CLIENT module ID +constexpr uint16_t MODID_TSD_CLIENT = 0x0114; // TSD_CLIENT module ID +constexpr uint16_t MODID_CHECKSUM = 0x0115; // Checksum module ID +constexpr uint16_t MODID_TDT_MONITOR = 0x0116; // TDT monitor module ID +constexpr uint16_t MODID_TDT_HOST = 0x0117; // GE adapts the TDT HOST module ID constexpr uint32_t TDT_API_MAX_SUB_VERSION = 100; static const int32_t TDT_INVAILED_DEVICE_ID = 0xFFFFFFFF; @@ -362,17 +363,50 @@ typedef enum tdt_api_version { namespace tdt { class StatusFactory { public: + /** + * @ingroup hiaiengine + * @brief Get a pointer to StatusFactory + * @param [in]: + * @return StatusFactory pointer + */ TDT_LIB_EXPORT static StatusFactory *GetInstance(); + /** + * @ingroup hiaiengine + * @brief Registration error code + * @param [in]err error code + * @param [in]desc Description string of the error code + */ TDT_LIB_EXPORT void RegisterErrorNo(const uint32_t err, const std::string &desc); + /** + * @ingroup hiaiengine + * @brief Get error code description string + * @param [in]err error code + */ std::string GetErrDesc(const uint32_t err); + /** + * @ingroup hiaiengine + * @brief Static function: Get error code description string + * @param [in]err error code + * return : If there is a problem, return the empty string "" + */ static std::string GetErrCodeDesc(uint32_t errCode); protected: + /** + * @ingroup hiaiengine + * @brief Constructor + * @param [in] void + */ StatusFactory(); + /** + * @ingroup hiaiengine + * @brief Destructor + * @param [in] void + */ ~StatusFactory() {} StatusFactory(const StatusFactory &) = delete; @@ -389,6 +423,12 @@ class StatusFactory { class ErrorNoRegisterar { public: + /** + * @ingroup hiaiengine + * @brief Registration error code + * @param [in]err error code + * @param [in]desc Description of the registration error code + */ ErrorNoRegisterar(const uint32_t &err, const std::string &desc) { StatusFactory::GetInstance()->RegisterErrorNo(err, desc); } @@ -452,6 +492,7 @@ TDT_DEF_ERROR_CODE(MODID_HDC_SERVER, TDT_ERROR, TDT_HDC_SERVER_CLIENT_SOCKET_CLO "hdc service or client socket closed"); /*********************TSDAEMON************************/ +// create TSDAEMON error level error TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_START_FAIL, "Tsdaemon start fail"); TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEANPROC_FIRST_GETPID_FAILED, "Tsdaemon first get pid fail"); TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEANPROC_KILL_PROCESS_FAILED, "Tsdaemon kill processfail"); @@ -464,6 +505,7 @@ TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEAN_RESOURCE_FAILED, " TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_SEND_MSG_FAILED, "Tsdaemon send msg fail"); /********************* PPC ****************************/ +// create PPC error level error TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_DRIVER_INIT_FAIL, "Init PPC driver fail"); TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SERVER_CLIENT_CREATE_FAIL, "Create PPC server or PPC client fail"); TDT_DEF_ERROR_CODE(MODID_PPC, TDT_ERROR, TDT_PPC_SERVER_CLIENT_DESTORY_FAIL, "Destory PPC server or PPC client fail"); diff --git a/third_party/fwkacllib/inc/tdt/tdt_host_interface.h b/third_party/fwkacllib/inc/tdt/tdt_host_interface.h index 6539735d..821ee819 100644 --- a/third_party/fwkacllib/inc/tdt/tdt_host_interface.h +++ b/third_party/fwkacllib/inc/tdt/tdt_host_interface.h @@ -27,17 +27,98 @@ extern "C" { #endif // __cplusplus namespace tdt { +/** +* @ingroup TdtHostInit +* @brief Initialize the interface, start and initialize various general thread, log and other services +* +* @par Function +* Initialize the interface, start and initialize various general thread, log and other services +* +* @param deviceId [IN] type #unsigned int. Physical device ID +* @retval #0 Success +* @retval #Not 0 Fail +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tdt_host_interface.h: Header file where the interface declaration is located. +*/ int32_t TdtHostInit(uint32_t deviceId); +/** +* @ingroup TdtHostPushData +* @brief Blocking queue. When the queue is full, the Push interface will block. +* +* @par Function +* Blocking queue. When the queue is full, the Push interface will block. +* +* @param channelName [IN] type #String. queue channel name +* @param items [IN] type #vector DataItem is defined in data_common.h. input data +* @retval 0 Success +* @retval OtherValues 0 Fail +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tdt_host_interface.h: Header file where the interface declaration is located. +* @li data_common.h: Header file where 'DataItem' defined +*/ int32_t TdtHostPushData(const std::string &channelName, const std::vector &item); +/** +* @ingroup TdtHostDestroy +* @brief Notify TDT component to close related resources +* +* @par Function +* Notify TDT component to close related resources +* +* @param NA +* @retval 0 Success +* @retval OtherValues Fail +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tdt_host_interface.h: Header file where the interface declaration is located. +*/ int32_t TdtHostDestroy(); +/** +* @ingroup TdtHostPopData +* @brief POP data from Tdt data storage queue +* +* @par Function +* POP data from Tdt data storage queue +* +* @param channelName [IN] type #String. queue channel name +* @param items [IN] type #vector DataItem is defined in data_common.h. input data +* @retval 0 Success +* @retval OtherValues 0 Fail +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tdt_host_interface.h: Header file where the interface declaration is located. +* @li data_common.h: Header file where 'DataItem' defined +*/ int32_t TdtHostPopData(const std::string &channelName, std::vector &item); +/** +* @ingroup TdtHostStop +* @brief Activate the thread that reads data externally from Tdt and +* send end of sequence data so that the external thread can exit +* +* @par Function +* Activate the thread that reads data externally from Tdt and send +* end of sequence data so that the external thread can exit +* +* @param channelName [IN] type #String. queue channel name +* @retval 0 Success +* @retval OtherValues Fail +* +* @par Dependency +* @li libtsdclient.so: Library to which the interface belongs. +* @li tdt_host_interface.h: Header file where the interface declaration is located. +*/ int32_t TdtHostStop(const std::string &channelName); } // namespace tdt #ifdef __cplusplus } #endif // __cplusplus -#endif +#endif // HOST_INNER_INC_TDT_HOST_INTERFACE_H_ diff --git a/third_party/fwkacllib/inc/tdt/tsd_client.h b/third_party/fwkacllib/inc/tdt/tsd_client.h index 2a15cda7..a50f9c6b 100644 --- a/third_party/fwkacllib/inc/tdt/tsd_client.h +++ b/third_party/fwkacllib/inc/tdt/tsd_client.h @@ -28,14 +28,69 @@ extern "C" { #endif // __cplusplus namespace tdt { +/** +* @ingroup RANK_SIZE_DEFAULT_VALUE。 +* The default value of Rank size is 1 by default. +* It does not pull up HCCP to perform set communication related operations. +* +*/ constexpr uint32_t RANK_SIZE_DEFAULT_VALUE = 1; class TsdClient { public: + /** + * @ingroup GetInstance + * @brief Get TsdClient instance + * + * @par Function + * Get TsdClient instance + * + * @param NA + * @retval TsdClient TsdClient instance + * + * @par Dependency + * @li libtsdclient.so: Library to which the interface belongs. + * @li tsd_client.h: Header file where the interface declaration is located. + */ static TsdClient *GetInstance(); + /** + * @ingroup ~TsdClient + * @brief TsdClient destructor + * + * @par Function + * TsdClient destructor + * + * @param NA + * @retval NA + * + * @par Dependency + * @li libtsdclient.so: Library to which the interface belongs. + * @li tsd_client.h: Header file where the interface declaration is located. + */ ~TsdClient(); + /** + * @ingroup Open + * @brief Used for the Framework process to communicate with the TSDDaemon process, + * and notify TSD to complete the initialization of other processes + * + * @par Function + * Used for the Framework process to communicate with the TSDDaemon process, + * and notify TSD to complete the initialization of other processes + * + * @param phyDeviceId [IN] type #unsigned int. Physical device ID + * @param rankSize [IN] type #unsigned int. The rankSize of the training. + * The default value is 1. When rankSize is greater than 1, + * HCCP will be pulled to perform set communication related operations. + * @retval TDT_OK Success + * @retval OtherValues Failure + * + * @par Dependency + * @li libtsdclient.so: Library to which the interface belongs. + * @li tsd_client.h: Header file where the interface declaration is located. + * @li data_common.h: Header file where 'TDT_StatusT' defined + */ TDT_StatusT Open(const uint32_t phyDeviceId, const uint32_t rankSize = RANK_SIZE_DEFAULT_VALUE); TDT_StatusT Close(); @@ -46,10 +101,11 @@ class TsdClient { TsdClient(TsdClient &&) = delete; TsdClient &operator=(const TsdClient &) = delete; TsdClient &operator=(TsdClient &&) = delete; + uint32_t rankSize_; }; } // namespace tdt #ifdef __cplusplus } #endif // __cplusplus -#endif // TDT_HOST_INNER_INC_TSD_CLIENT_H_ +#endif // TDT_HOST_INNER_INC_TSD_CLIENT_H_ diff --git a/third_party/fwkacllib/inc/toolchain/prof_engine.h b/third_party/fwkacllib/inc/toolchain/prof_engine.h index 61a2a437..0e757dcf 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_engine.h +++ b/third_party/fwkacllib/inc/toolchain/prof_engine.h @@ -14,22 +14,21 @@ * limitations under the License. */ -#ifndef _MSPROF_ENGINE_PROF_ENGINE_H_ -#define _MSPROF_ENGINE_PROF_ENGINE_H_ +#ifndef MSPROF_ENGINE_PROF_ENGINE_H_ +#define MSPROF_ENGINE_PROF_ENGINE_H_ #define MSVP_PROF_API __attribute__((visibility("default"))) #include #include #include "prof_reporter.h" -namespace Msprof { -namespace Engine { - /** + * @file prof_engine.h * @defgroup ModuleJobConfig the ModuleJobConfig group * This is the ModuleJobConfig group */ - +namespace Msprof { +namespace Engine { /** * @ingroup ModuleJobConfig * @brief struct ModuleJobConfig @@ -205,11 +204,4 @@ MSVP_PROF_API int UnInit(const std::string &module); } // namespace Engine } // namespace Msprof -#endif -/* - * History: \n - * 2019-04-10, huawei, Create file. \n - * 2020-02-10, huawei, Add Api Comment. \n - * - * vi: set expandtab ts=4 sw=4 tw=120: - */ \ No newline at end of file +#endif // MSPROF_ENGINE_PROF_ENGINE_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/toolchain/prof_mgr_core.h b/third_party/fwkacllib/inc/toolchain/prof_mgr_core.h index 5dbc15a7..4f013eef 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_mgr_core.h +++ b/third_party/fwkacllib/inc/toolchain/prof_mgr_core.h @@ -14,14 +14,15 @@ * limitations under the License. */ -#ifndef _MSPROF_ENGINE_PROF_MGR_CORE_H_ -#define _MSPROF_ENGINE_PROF_MGR_CORE_H_ +#ifndef MSPROF_ENGINE_PROF_MGR_CORE_H_ +#define MSPROF_ENGINE_PROF_MGR_CORE_H_ #define MSVP_PROF_API __attribute__((visibility("default"))) #include #include /** + * @file prof_mgr_core.h * @brief : struct ProfMgrCfg */ struct ProfMgrCfg { @@ -79,11 +80,5 @@ MSVP_PROF_API int ProfMgrStop(void *handle); * @see ProfMgrStartUp */ MSVP_PROF_API int ProfMgrGetConf(const std::string &aicoreMetricsType, ProfMgrConf *conf); -#endif -/* - * History: \n - * 2019-04-10, huawei, Create file. \n - * 2020-02-10, huawei, Add Api Comment. \n - * - * vi: set expandtab ts=4 sw=4 tw=120: - */ \ No newline at end of file + +#endif // MSPROF_ENGINE_PROF_MGR_CORE_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/toolchain/prof_reporter.h b/third_party/fwkacllib/inc/toolchain/prof_reporter.h index ce4ae7fa..c734380c 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_reporter.h +++ b/third_party/fwkacllib/inc/toolchain/prof_reporter.h @@ -14,19 +14,19 @@ * limitations under the License. */ +#ifndef MSPROF_ENGINE_PROF_REPORTER_H_ +#define MSPROF_ENGINE_PROF_REPORTER_H_ +#define MSVP_PROF_API __attribute__((visibility("default"))) + /** + * @file prof_reporter.h * @defgroup reporter the reporter group * This is the reporter group */ -#ifndef _MSPROF_ENGINE_PROF_REPORTER_H_ -#define _MSPROF_ENGINE_PROF_REPORTER_H_ -#define MSVP_PROF_API __attribute__((visibility("default"))) namespace Msprof { namespace Engine { - /// the max tag length #define MSPROF_ENGINE_MAX_TAG_LEN (31) - /** * @ingroup reporter * @brief struct ReporterData @@ -86,11 +86,4 @@ class MSVP_PROF_API Reporter { } // namespace Engine } // namespace Msprof -#endif -/* - * History: \n - * 2019-04-10, huawei, Create file. \n - * 2020-02-10, huawei, Add Api Comment. \n - * - * vi: set expandtab ts=4 sw=4 tw=120: - */ \ No newline at end of file +#endif // MSPROF_ENGINE_PROF_REPORTER_H_ \ No newline at end of file diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h index e86ec846..01636253 100644 --- a/third_party/fwkacllib/inc/toolchain/slog.h +++ b/third_party/fwkacllib/inc/toolchain/slog.h @@ -14,12 +14,12 @@ * limitations under the License. */ -#ifndef _D_SYSLOG_H -#define _D_SYSLOG_H +#ifndef D_SYSLOG_H_ +#define D_SYSLOG_H_ #ifdef __cplusplus extern "C" { -#endif +#endif // __cplusplus /** * @ingroup slog @@ -218,7 +218,7 @@ static DCODE g_moduleIdName[] = {SET_MOUDLE_ID_MAP_NAME(SLOG), SET_MOUDLE_ID_MAP_NAME(SIS), SET_MOUDLE_ID_MAP_NAME(HSM), {NULL, -1}}; -#endif +#endif // SET_MOUDLE_ID_MAP_NAME /** * @ingroup slog @@ -352,5 +352,5 @@ void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, c #ifdef __cplusplus } -#endif -#endif /* sys/slog.h */ +#endif // __cplusplus +#endif // D_SYSLOG_H_ diff --git a/third_party/fwkacllib/version.info b/third_party/fwkacllib/version.info index 1ce2d666..f9a8f04c 100644 --- a/third_party/fwkacllib/version.info +++ b/third_party/fwkacllib/version.info @@ -1 +1 @@ -Version=1.60.T51.0.B203 +Version=1.60.T0.0.B888 diff --git a/third_party/prebuild/x86_64/libslog.so b/third_party/prebuild/x86_64/libslog.so index 168c1ac6e8c3f563f01498bf9946348c2f42cc0f..23efeb3f5d58de362e3d560c6132c8f7861ae97e 100755 GIT binary patch delta 469 zcmV;`0V@8;xdq6%1+YL-0S~i5Q6B*TT9aW?Gy!L`mQt1i0Wgy-RYCzWlWuJ^=xLlSy;00iBcib58+$lSyXTx4IRWjHoOn6`@srnh`vD1)(|LaZ z1G7_lAOZn7vvhp50RiZfDt3Z+`(jlOJ#?0Y0-ya4!J?W|MhwD*;oJrEz2d zUb6~vJ^=xMlSy;00iKijb58+%lSyyu)5IRWmIoOn6`@{`wi`vD4*(|LaZ z1hZ3mAOZn8vvhp50RicgDt