From 43faf5dbf347b2a6a5a8c8572b2cc32679f919a5 Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Tue, 29 Dec 2020 14:29:19 +0800 Subject: [PATCH] Synchronize latest Ascend software suite 29 Dec 2020 --- Third_Party_Open_Source_Software_Notice | 73 ++++ cmake/intf_pub_linux.cmake | 3 +- ge/CMakeLists.txt | 6 + ge/client/ge_api.cc | 19 + ge/common/dump/dump_op.cc | 15 +- ge/common/ge/tbe_plugin_manager.cc | 4 +- ge/common/profiling/ge_profiling.cc | 2 +- ge/common/profiling/profiling_manager.cc | 14 +- ge/common/proto/op_mapping_info.proto | 2 - ge/common/proto/tensorflow/attr_value.proto | 8 + ge/common/proto/tensorflow/function.proto | 8 + ge/common/proto/tensorflow/graph.proto | 8 + ge/common/proto/tensorflow/graph_library.proto | 8 + ge/common/proto/tensorflow/node_def.proto | 8 + ge/common/proto/tensorflow/op_def.proto | 8 + ge/common/proto/tensorflow/resource_handle.proto | 8 + ge/common/proto/tensorflow/tensor.proto | 8 + ge/common/proto/tensorflow/tensor_shape.proto | 8 + ge/common/proto/tensorflow/types.proto | 8 + ge/common/proto/tensorflow/versions.proto | 8 + ge/executor/ge_executor.cc | 121 +++++-- ge/executor/proto/op_mapping_info.proto | 2 - ge/ge_inference.mk | 2 + ge/ge_local_engine/engine/host_cpu_engine.cc | 5 +- ge/ge_runner.mk | 2 + ge/generator/ge_generator.cc | 62 +++- ge/graph/build/memory/graph_mem_assigner.cc | 2 +- ge/graph/build/model_builder.cc | 54 +++ ge/graph/build/model_builder.h | 6 + ge/graph/build/stream_graph_optimizer.cc | 10 +- ge/graph/build/task_generator.cc | 2 +- ge/graph/load/graph_loader.cc | 77 +++- ge/graph/load/graph_loader.h | 6 + ge/graph/load/new_model_manager/data_dumper.cc | 6 - ge/graph/load/new_model_manager/davinci_model.cc | 284 ++++++++------- ge/graph/load/new_model_manager/davinci_model.h | 40 +- ge/graph/load/new_model_manager/model_manager.cc | 274 ++++++++++++-- ge/graph/load/new_model_manager/model_manager.h | 8 +- .../task_info/kernel_task_info.cc | 132 ++++--- .../new_model_manager/task_info/kernel_task_info.h | 2 - ge/graph/manager/graph_manager.cc | 30 +- ge/graph/manager/graph_mem_allocator.cc | 3 + ge/graph/optimize/graph_optimize.cc | 3 +- ge/graph/passes/attach_stream_label_pass.cc | 28 +- ge/graph/passes/attach_stream_label_pass.h | 4 +- ge/graph/passes/base_pass.cc | 2 +- ge/graph/passes/dimension_adjust_pass.cc | 64 ++++ ge/graph/passes/dimension_adjust_pass.h | 4 + .../passes/dynamic_single_op_reset_shape_pass.cc | 12 +- .../passes/dynamic_single_op_reset_shape_pass.h | 2 +- ge/graph/passes/enter_pass.cc | 64 +++- ge/graph/passes/enter_pass.h | 3 +- ge/graph/passes/folding_pass.cc | 5 +- ge/graph/passes/merge_to_stream_merge_pass.cc | 10 - ge/graph/passes/multi_batch_clone_pass.cc | 74 ++-- ge/graph/passes/multi_batch_clone_pass.h | 22 +- ge/graph/passes/next_iteration_pass.cc | 262 +++++--------- ge/graph/passes/next_iteration_pass.h | 16 +- ge/graph/passes/remove_same_const_pass.cc | 106 ++++++ ge/graph/passes/remove_same_const_pass.h | 28 ++ ge/graph/passes/switch_to_stream_switch_pass.cc | 12 +- ge/graph/passes/useless_control_out_remove_pass.cc | 51 +++ ge/graph/passes/useless_control_out_remove_pass.h | 29 ++ ge/graph/preprocess/graph_preprocess.cc | 40 ++ ge/graph/preprocess/multi_batch_copy_graph.cc | 402 ++++++++++++++++++--- ge/graph/preprocess/multi_batch_copy_graph.h | 16 +- ge/host_kernels/dynamic_stitch_kernel.cc | 11 +- ge/hybrid/executor/hybrid_execution_context.h | 2 + ge/hybrid/executor/hybrid_model_executor.cc | 1 + ge/hybrid/executor/node_state.cc | 56 ++- ge/hybrid/executor/node_state.h | 3 +- ge/hybrid/executor/subgraph_executor.cc | 9 +- ge/hybrid/executor/worker/execution_engine.cc | 9 +- .../executor/worker/shape_inference_engine.cc | 121 +------ ge/hybrid/executor/worker/shape_inference_engine.h | 4 - ge/hybrid/executor/worker/task_compile_engine.cc | 3 + ge/hybrid/model/hybrid_model_builder.cc | 26 ++ ge/hybrid/model/hybrid_model_builder.h | 1 + ge/hybrid/model/node_item.cc | 91 ++--- ge/hybrid/model/node_item.h | 5 - ge/hybrid/node_executor/task_context.cc | 22 -- ge/hybrid/node_executor/task_context.h | 2 - ge/ir_build/atc_ir_common.cc | 7 +- ge/ir_build/atc_ir_common.h | 2 +- ge/offline/main.cc | 2 +- ge/proto/caffe/caffe.proto | 8 + ge/proto/op_mapping_info.proto | 2 - ge/proto/tensorflow/attr_value.proto | 8 + ge/proto/tensorflow/function.proto | 8 + ge/proto/tensorflow/graph.proto | 8 + ge/proto/tensorflow/graph_library.proto | 8 + ge/proto/tensorflow/node_def.proto | 8 + ge/proto/tensorflow/op_def.proto | 8 + ge/proto/tensorflow/resource_handle.proto | 8 + ge/proto/tensorflow/tensor.proto | 8 + ge/proto/tensorflow/tensor_shape.proto | 8 + ge/proto/tensorflow/types.proto | 8 + ge/proto/tensorflow/versions.proto | 8 + ge/single_op/task/op_task.cc | 8 +- inc/external/acl/error_codes/rt_error_codes.h | 202 +++++------ inc/external/acl/ops/acl_fv.h | 1 - inc/external/ge/ge_api_types.h | 5 +- inc/external/runtime/rt_error_codes.h | 196 +++++----- inc/framework/common/ge_types.h | 2 - inc/framework/executor/ge_executor.h | 13 + inc/framework/omg/parser/model_parser.h | 3 - metadef/CMakeLists.txt | 6 +- metadef/OWNERS | 2 + metadef/cmake/intf_pub_linux.cmake | 3 +- metadef/graph/option/ge_context.cc | 2 +- metadef/graph/proto/op_mapping_info.proto | 2 - metadef/inc/common/proto/op_mapping_info.proto | 2 - .../inc/common/util/error_manager/error_manager.h | 19 +- metadef/inc/common/util/platform_info.h | 131 ++----- metadef/inc/common/util/platform_infos_def.h | 283 --------------- metadef/inc/graph/debug/ge_attr_define.h | 1 - metadef/inc/register/proto/op_mapping_info.proto | 2 - .../graphengine/inc/external/ge/ge_api_types.h | 10 +- .../inc/framework/omg/parser/model_parser.h | 12 + third_party/fwkacllib/inc/cce/aicpu_engine.h | 1 - third_party/fwkacllib/inc/hccl/hcom.h | 128 ++----- .../fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h | 4 +- third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h | 1 + third_party/fwkacllib/inc/ops/aipp.h | 4 +- third_party/fwkacllib/inc/ops/all_ops.h | 2 +- third_party/fwkacllib/inc/ops/array_ops.h | 73 +++- third_party/fwkacllib/inc/ops/audio_ops.h | 2 +- third_party/fwkacllib/inc/ops/batch_ops.h | 2 +- third_party/fwkacllib/inc/ops/bitwise_ops.h | 2 +- third_party/fwkacllib/inc/ops/boosted_trees_ops.h | 2 +- .../fwkacllib/inc/ops/candidate_sampling_ops.h | 2 +- third_party/fwkacllib/inc/ops/condtake_ops.h | 2 +- third_party/fwkacllib/inc/ops/control_flow_ops.h | 2 +- third_party/fwkacllib/inc/ops/ctc_ops.h | 2 +- third_party/fwkacllib/inc/ops/data_flow_ops.h | 2 +- .../fwkacllib/inc/ops/elewise_calculation_ops.h | 284 ++++++++++++++- third_party/fwkacllib/inc/ops/functional_ops.h | 2 +- third_party/fwkacllib/inc/ops/get_data_ops.h | 2 +- third_party/fwkacllib/inc/ops/hcom_ops.h | 47 ++- third_party/fwkacllib/inc/ops/hvd_ops.h | 2 +- third_party/fwkacllib/inc/ops/image_ops.h | 131 +++++-- third_party/fwkacllib/inc/ops/internal_ops.h | 2 +- third_party/fwkacllib/inc/ops/linalg_ops.h | 2 +- third_party/fwkacllib/inc/ops/logging_ops.h | 2 +- third_party/fwkacllib/inc/ops/lookup_ops.h | 2 +- third_party/fwkacllib/inc/ops/math_ops.h | 65 +++- .../fwkacllib/inc/ops/matrix_calculation_ops.h | 57 ++- third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h | 2 +- third_party/fwkacllib/inc/ops/nn_calculation_ops.h | 126 ++++--- third_party/fwkacllib/inc/ops/nn_detect_ops.h | 10 +- third_party/fwkacllib/inc/ops/nn_norm_ops.h | 237 +++++++++++- third_party/fwkacllib/inc/ops/nn_ops.h | 2 +- third_party/fwkacllib/inc/ops/nn_pooling_ops.h | 225 +++++++++++- third_party/fwkacllib/inc/ops/nn_training_ops.h | 2 +- third_party/fwkacllib/inc/ops/no_op.h | 2 +- third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h | 204 ++++++++++- third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h | 2 +- third_party/fwkacllib/inc/ops/outfeed_ops.h | 2 +- third_party/fwkacllib/inc/ops/pad_ops.h | 56 ++- third_party/fwkacllib/inc/ops/parsing_ops.h | 2 +- third_party/fwkacllib/inc/ops/quantize_ops.h | 2 +- third_party/fwkacllib/inc/ops/ragged_array_ops.h | 2 +- .../fwkacllib/inc/ops/ragged_conversion_ops.h | 2 +- third_party/fwkacllib/inc/ops/ragged_math_ops.h | 2 +- third_party/fwkacllib/inc/ops/random_ops.h | 56 ++- third_party/fwkacllib/inc/ops/reduce_ops.h | 47 ++- .../fwkacllib/inc/ops/resource_variable_ops.h | 2 +- third_party/fwkacllib/inc/ops/rnn.h | 181 +++++++++- third_party/fwkacllib/inc/ops/rpn_ops.h | 2 +- third_party/fwkacllib/inc/ops/save_ops.h | 2 +- third_party/fwkacllib/inc/ops/sdca_ops.h | 2 +- third_party/fwkacllib/inc/ops/selection_ops.h | 184 +++++++++- third_party/fwkacllib/inc/ops/set_ops.h | 2 +- third_party/fwkacllib/inc/ops/sparse_ops.h | 2 +- third_party/fwkacllib/inc/ops/spectral_ops.h | 2 +- .../fwkacllib/inc/ops/split_combination_ops.h | 2 +- third_party/fwkacllib/inc/ops/state_ops.h | 2 +- .../fwkacllib/inc/ops/stateful_random_ops.h | 2 +- .../fwkacllib/inc/ops/stateless_random_ops.h | 2 +- third_party/fwkacllib/inc/ops/string_ops.h | 2 +- third_party/fwkacllib/inc/ops/swap_co_ops.h | 2 +- .../fwkacllib/inc/ops/target_crop_and_resize.h | 2 +- third_party/fwkacllib/inc/ops/transformation_ops.h | 11 +- .../fwkacllib/inc/ops/warp_perspective_ops.h | 2 +- third_party/fwkacllib/inc/runtime/base.h | 10 +- third_party/fwkacllib/inc/runtime/config.h | 10 +- third_party/fwkacllib/inc/runtime/context.h | 10 +- third_party/fwkacllib/inc/runtime/dev.h | 10 +- third_party/fwkacllib/inc/runtime/dvfsprofile.h | 10 +- third_party/fwkacllib/inc/runtime/event.h | 10 +- third_party/fwkacllib/inc/runtime/kernel.h | 10 +- third_party/fwkacllib/inc/runtime/mem.h | 10 +- third_party/fwkacllib/inc/runtime/rt.h | 10 +- third_party/fwkacllib/inc/runtime/rt_model.h | 10 +- third_party/fwkacllib/inc/runtime/stream.h | 11 +- third_party/fwkacllib/inc/tdt/index_transform.h | 20 +- third_party/fwkacllib/inc/toolchain/prof_acl_api.h | 12 +- .../fwkacllib/inc/toolchain/prof_callback.h | 5 +- .../fwkacllib/inc/toolchain/tuning_tool/tune_api.h | 18 +- 199 files changed, 4365 insertions(+), 2014 deletions(-) create mode 100644 ge/graph/passes/remove_same_const_pass.cc create mode 100644 ge/graph/passes/remove_same_const_pass.h create mode 100644 ge/graph/passes/useless_control_out_remove_pass.cc create mode 100644 ge/graph/passes/useless_control_out_remove_pass.h delete mode 100644 metadef/inc/common/util/platform_infos_def.h diff --git a/Third_Party_Open_Source_Software_Notice b/Third_Party_Open_Source_Software_Notice index 0d79cfa5..ba8da1fb 100644 --- a/Third_Party_Open_Source_Software_Notice +++ b/Third_Party_Open_Source_Software_Notice @@ -458,3 +458,76 @@ Copyright (c) Facebook Inc. and Microsoft Corporation. License: MIT License Please see above. + + + +Software: caffe 1.0 + +License: BSD 2-Clause License + +Open Source Software Licensed Under the BSD 2-Clause License + +GraphEngine uses source code files from caffe so as to support model format conversion from caffe model to GraphEngine model. +Please see below for the full list of source code files from caffe that are used by GraphEngine. +The below software in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. +---------------------------------------------------------------------------------------- +1. caffe.proto master +All contributions by the University of California: +Copyright (c) 2014-2017 The Regents of the University of California (Regents) +All rights reserved. + + +Terms of the BSD 2-Clause License: +-------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + +Software: tensorflow 1.15.0 + +License: Apache-2.0 License + +Open Source Software Licensed Under the Apache-2.0 License + + +GraphEngine uses source code files from tensorflow so as to support model format conversion from tensorflow model to GraphEngine model. +Please see below for the full list of source code files from tensorflow that are used by GraphEngine. +The below software in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. +---------------------------------------------------------------------------------------- +1. attr_value.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +2. function.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +3. graph.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +4. node_def.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +5. op_def.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +6. resource_handle.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +7. tensor.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +8. tensor_shape.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +9. types.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +10. versions.proto master +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Terms of the Apache-2.0 License: +Please see above. diff --git a/cmake/intf_pub_linux.cmake b/cmake/intf_pub_linux.cmake index 40c6bca9..380de1e1 100755 --- a/cmake/intf_pub_linux.cmake +++ b/cmake/intf_pub_linux.cmake @@ -4,7 +4,7 @@ endif() add_library(intf_pub INTERFACE) -target_compile_options(intf_pub INTERFACE +target_compile_options(intf_pub INTERFACE -Wall -fPIC $,-fstack-protector-all,-fstack-protector-strong> @@ -16,6 +16,7 @@ target_compile_definitions(intf_pub INTERFACE $<$:CFG_BUILD_DEBUG> WIN64=1 LINUX=0 + LOG_CPP ) target_link_options(intf_pub INTERFACE -Wl,-z,relro diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 3a0f7638..88d74730 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -157,6 +157,8 @@ set(TRAIN_SRC_LIST "graph/passes/compile_nodes_pass.cc" "graph/passes/constant_folding_pass.cc" "graph/passes/constant_fuse_same_pass.cc" + "graph/passes/remove_same_const_pass.cc" + "graph/passes/useless_control_out_remove_pass.cc" "graph/passes/control_trigger_pass.cc" "graph/passes/dimension_adjust_pass.cc" "graph/passes/dimension_compute_pass.cc" @@ -522,6 +524,8 @@ set(INFER_SRC_LIST "graph/passes/assign_pass.cc" "graph/passes/addn_pass.cc" "graph/passes/common_subexpression_elimination_pass.cc" + "graph/passes/remove_same_const_pass.cc" + "graph/passes/useless_control_out_remove_pass.cc" "graph/passes/transop_symmetry_elimination_pass.cc" "graph/passes/save_pass.cc" "graph/passes/switch_dead_branch_elimination.cc" @@ -616,6 +620,7 @@ target_compile_definitions(ge_runner PRIVATE FMK_SUPPORT_DUMP DAVINCI_CLOUD google=ascend_private + $<$:ONLY_COMPILE_OPEN_SRC> ) target_compile_options(ge_runner PRIVATE @@ -683,6 +688,7 @@ target_compile_definitions(ge_compiler PRIVATE FMK_HOST_INFER COMPILE_OMG_PACKAGE google=ascend_private + $<$:ONLY_COMPILE_OPEN_SRC> ) target_compile_options(ge_compiler PRIVATE diff --git a/ge/client/ge_api.cc b/ge/client/ge_api.cc index 66958310..75cc92d4 100644 --- a/ge/client/ge_api.cc +++ b/ge/client/ge_api.cc @@ -32,6 +32,9 @@ #include "graph/common/ge_call_wrapper.h" #include "register/op_registry.h" #include "common/ge/tbe_plugin_manager.h" +#ifndef ONLY_COMPILE_OPEN_SRC +#include "toolchain/plog.h" +#endif using domi::OpRegistry; using std::map; @@ -129,6 +132,11 @@ Status GEInitializeImpl(const std::map &options) { // Initialize GE, prepare for execution, call GELib::Initialize Status GEInitialize(const std::map &options) { +#ifndef ONLY_COMPILE_OPEN_SRC + if (DlogReportInitialize() != SUCCESS) { + GELOGW("Dlog report device log initialize failed."); + } +#endif return GEInitializeImpl(options); } @@ -143,6 +151,11 @@ Status GEInitialize(const std::map &options) { std::string val = option.second.GetString(); str_options[key] = val; } +#ifndef ONLY_COMPILE_OPEN_SRC + if (DlogReportInitialize() != SUCCESS) { + GELOGW("Dlog report device log initialize failed."); + } +#endif return GEInitializeImpl(str_options); } @@ -187,6 +200,12 @@ Status GEFinalize() { // to avoid memory fragment, use malloc_trim to back free stack to system malloc_trim(0); +#ifndef ONLY_COMPILE_OPEN_SRC + if (DlogReportFinalize() != SUCCESS) { + GELOGW("Dlog report device log finalize failed."); + } +#endif + GELOGT(TRACE_STOP, "GEFinalize finished"); return ret; } diff --git a/ge/common/dump/dump_op.cc b/ge/common/dump/dump_op.cc index 0b9e9dcc..e92ada05 100644 --- a/ge/common/dump/dump_op.cc +++ b/ge/common/dump/dump_op.cc @@ -94,9 +94,6 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) { for (auto dim : output_descs.at(i).GetShape().GetDims()) { output.mutable_shape()->add_dim(dim); } - for (auto dim : output_descs.at(i).GetOriginShape().GetDims()) { - output.mutable_origin_shape()->add_dim(dim); - } int64_t output_size = 0; if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { GELOGE(PARAM_INVALID, "Get output size filed"); @@ -121,9 +118,6 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) { for (auto dim : input_descs.at(i).GetShape().GetDims()) { input.mutable_shape()->add_dim(dim); } - for (auto dim : input_descs.at(i).GetOriginShape().GetDims()) { - input.mutable_origin_shape()->add_dim(dim); - } int64_t input_size = 0; if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { GELOGE(PARAM_INVALID, "Get output size filed"); @@ -220,15 +214,8 @@ Status DumpOp::LaunchDumpOp() { SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(), dump_path.c_str()); - uint32_t task_id = 0; - uint32_t stream_id = 0; - rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGW("call rtGetTaskIdAndStreamID failed, ret = 0x%X", rt_ret); - } + aicpu::dump::Task task; - task.set_task_id(task_id); - task.set_stream_id(stream_id); task.mutable_op()->set_op_name(op_desc_->GetName()); task.mutable_op()->set_op_type(op_desc_->GetType()); if (dump_properties_.GetDumpMode() == kDumpOutput) { diff --git a/ge/common/ge/tbe_plugin_manager.cc b/ge/common/ge/tbe_plugin_manager.cc index 0cc7d553..e3d2282a 100644 --- a/ge/common/ge/tbe_plugin_manager.cc +++ b/ge/common/ge/tbe_plugin_manager.cc @@ -184,7 +184,7 @@ void TBEPluginManager::LoadCustomOpLib() { std::string fmk_type = std::to_string(domi::TENSORFLOW); auto it = options_.find(ge::FRAMEWORK_TYPE); if (it != options_.end()) { - fmk_type = it->second; + fmk_type = it->second; } std::vector registration_datas = domi::OpRegistry::Instance()->registrationDatas; GELOGI("The size of registration_datas is: %zu", registration_datas.size()); @@ -192,7 +192,7 @@ void TBEPluginManager::LoadCustomOpLib() { if (std::to_string(reg_data.GetFrameworkType()) == fmk_type) { GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(), TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str()); - (void)domi::OpRegistry::Instance()->Register(reg_data); + domi::OpRegistry::Instance()->Register(reg_data); } } } diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc index 43ed6434..d6740d8b 100644 --- a/ge/common/profiling/ge_profiling.cc +++ b/ge/common/profiling/ge_profiling.cc @@ -182,7 +182,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le command.module_index = prof_config_param->profSwitch; } GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%llx", iter->second.c_str(), - command.module_index); + command.module_index); if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); } diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 994b3eac..86ae7dca 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -38,8 +38,10 @@ const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; } // namespace namespace ge { -ProfilingManager::ProfilingManager() - : is_load_profiling_(false), is_execute_profiling_(false), is_training_trace_(false), subscribe_count_(0) { +ProfilingManager::ProfilingManager() : is_load_profiling_(false), + is_execute_profiling_(false), + is_training_trace_(false), + subscribe_count_(0) { prof_cb_.msprofCtrlCallback = nullptr; prof_cb_.msprofReporterCallback = nullptr; } @@ -100,8 +102,8 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt return INTERNAL_ERROR; } is_execute_profiling_ = true; - GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), prof_conf.options, - options.profiling_options.c_str()); + GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), + prof_conf.options, options.profiling_options.c_str()); } else { (void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH); (void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX); @@ -141,9 +143,6 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) { } try { Json prof_options = Json::parse(options); - if (options.find(kTrainingTrace) == std::string::npos) { - return ge::SUCCESS; - } const std::string training_trace = prof_options[kTrainingTrace]; if (training_trace.empty()) { GELOGI("Training trace will not take effect."); @@ -842,7 +841,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP return; } } - return; } diff --git a/ge/common/proto/op_mapping_info.proto b/ge/common/proto/op_mapping_info.proto index 7fb6f84b..e23b7ebe 100644 --- a/ge/common/proto/op_mapping_info.proto +++ b/ge/common/proto/op_mapping_info.proto @@ -15,7 +15,6 @@ message Output { int32 original_output_data_type = 7; int32 original_output_format = 8; uint64 size = 9; - Shape origin_shape = 10; } message Input { @@ -24,7 +23,6 @@ message Input { Shape shape = 3; uint64 address = 4; uint64 size = 5; - Shape origin_shape = 6; } enum BufferType { diff --git a/ge/common/proto/tensorflow/attr_value.proto b/ge/common/proto/tensorflow/attr_value.proto index 1cc67d62..438d7163 100644 --- a/ge/common/proto/tensorflow/attr_value.proto +++ b/ge/common/proto/tensorflow/attr_value.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/common/proto/tensorflow/function.proto b/ge/common/proto/tensorflow/function.proto index 075897c6..44681e32 100644 --- a/ge/common/proto/tensorflow/function.proto +++ b/ge/common/proto/tensorflow/function.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/common/proto/tensorflow/graph.proto b/ge/common/proto/tensorflow/graph.proto index d639a7d6..73bfc6ee 100644 --- a/ge/common/proto/tensorflow/graph.proto +++ b/ge/common/proto/tensorflow/graph.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/common/proto/tensorflow/graph_library.proto b/ge/common/proto/tensorflow/graph_library.proto index e393d38d..7bca0838 100644 --- a/ge/common/proto/tensorflow/graph_library.proto +++ b/ge/common/proto/tensorflow/graph_library.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/common/proto/tensorflow/node_def.proto b/ge/common/proto/tensorflow/node_def.proto index b9bc97ee..50cf5cac 100644 --- a/ge/common/proto/tensorflow/node_def.proto +++ b/ge/common/proto/tensorflow/node_def.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/common/proto/tensorflow/op_def.proto b/ge/common/proto/tensorflow/op_def.proto index 3485d045..7f0e8ce2 100644 --- a/ge/common/proto/tensorflow/op_def.proto +++ b/ge/common/proto/tensorflow/op_def.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/common/proto/tensorflow/resource_handle.proto b/ge/common/proto/tensorflow/resource_handle.proto index a3452351..91c46c9a 100644 --- a/ge/common/proto/tensorflow/resource_handle.proto +++ b/ge/common/proto/tensorflow/resource_handle.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/common/proto/tensorflow/tensor.proto b/ge/common/proto/tensorflow/tensor.proto index d0a4d024..48eeb6c4 100644 --- a/ge/common/proto/tensorflow/tensor.proto +++ b/ge/common/proto/tensorflow/tensor.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/common/proto/tensorflow/tensor_shape.proto b/ge/common/proto/tensorflow/tensor_shape.proto index 4225a2e3..3a6d8c5a 100644 --- a/ge/common/proto/tensorflow/tensor_shape.proto +++ b/ge/common/proto/tensorflow/tensor_shape.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + // Protocol buffer representing the shape of tensors. syntax = "proto3"; diff --git a/ge/common/proto/tensorflow/types.proto b/ge/common/proto/tensorflow/types.proto index ba7a72b3..f40e49cb 100644 --- a/ge/common/proto/tensorflow/types.proto +++ b/ge/common/proto/tensorflow/types.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/common/proto/tensorflow/versions.proto b/ge/common/proto/tensorflow/versions.proto index 48061218..4e81548f 100644 --- a/ge/common/proto/tensorflow/versions.proto +++ b/ge/common/proto/tensorflow/versions.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 18d78696..923cac89 100644 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -209,33 +209,19 @@ bool IsDynmaicDimsSizeMatchModel(const vector cur_dynamic_dims, namespace ge { bool GeExecutor::isInit_ = false; - -static void InitOpsProtoManger() { - string opsproto_path; - const char *path_env = std::getenv("ASCEND_OPP_PATH"); - if (path_env != nullptr) { - string path = path_env; - string file_path = RealPath(path.c_str()); - if (file_path.empty()) { - GELOGE(FAILED, "File path %s is invalid.", path.c_str()); - return; +class ModelListenerAdapter : public ModelListener { + public: + domi::Status OnComputeDone(uint32_t model_id, uint32_t dataIndex, uint32_t resultCode, + std::vector &outputs) { + if (listener == nullptr) { + GELOGE(ge::FAILED, "listener is null."); + return FAILED; } - opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/"); - GELOGI("Get opsproto so path from env : %s", path.c_str()); - } else { - string path_base = PluginManager::GetPath(); - GELOGI("path_base is %s", path_base.c_str()); - path_base = path_base.substr(0, path_base.rfind('/')); - path_base = path_base.substr(0, path_base.rfind('/') + 1); - opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); - } - - GELOGI("Get opsproto path is %s", opsproto_path.c_str()); - OpsProtoManager *manager = OpsProtoManager::Instance(); - map option_tmp; - option_tmp.emplace(std::pair(string("ge.opsProtoLibPath"), opsproto_path)); - (void)manager->Initialize(option_tmp); -} + return listener->OnComputeDone(model_id, dataIndex, resultCode, outputs); + } + + std::shared_ptr listener; +}; GeExecutor::GeExecutor() {} @@ -246,16 +232,6 @@ Status GeExecutor::Initialize() { return ge::SUCCESS; } - OpTilingManager::GetInstance().LoadSo(); - - Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize(); - if (initHostCpuEngineStatus != SUCCESS) { - GELOGE(initHostCpuEngineStatus, "Failed to initialize HostCpuEngine"); - return initHostCpuEngineStatus; - } - - InitOpsProtoManger(); - std::vector mem_type(1, RT_MEMORY_HBM); mem_type.push_back(RT_MEMORY_P2P_DDR); auto ret = MemManager::Instance().Initialize(mem_type); @@ -560,6 +536,60 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add return SUCCESS; } +// Load model +Status GeExecutor::LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key, + int32_t priority, std::shared_ptr listener) { + GELOGI("load model offline begin."); + if (!isInit_) { + GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); + return ACL_ERROR_GE_EXEC_NOT_INIT; + } + + string filePath = RealPath(path.c_str()); + if (filePath.empty()) { + GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, + "File path is invalid. please check your text file '%s'.", path.c_str()); + return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; + } + + std::shared_ptr listener_adapter = MakeShared(); + if (listener_adapter == nullptr) { + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!"); + return ACL_ERROR_GE_MEMORY_ALLOCATION; + } + listener_adapter->listener = listener; + + Status ret = GraphLoader::LoadModelFromFile(path, key, priority, listener_adapter, model_id); + if (ret != SUCCESS) { + GELOGE(ret, "[GeExecutor] LoadModelFromFile failed"); + return ACL_ERROR_GE_LOAD_MODEL; + } + return SUCCESS; +} + +Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data, + std::shared_ptr listener) { + GELOGI("Load model begin."); + if (!isInit_) { + GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); + return ACL_ERROR_GE_EXEC_NOT_INIT; + } + + std::shared_ptr listener_adapter = MakeShared(); + if (listener_adapter == nullptr) { + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!"); + return ACL_ERROR_GE_MEMORY_ALLOCATION; + } + listener_adapter->listener = listener; + + Status ret = GraphLoader::LoadModel(model_data, listener_adapter, model_id); + if (ret != SUCCESS) { + GELOGE(ret, "[GeExecutor] LoadModel failed."); + return ACL_ERROR_GE_LOAD_MODEL; + } + return ret; +} + Status GeExecutor::UnloadModel(uint32_t model_id) { GELOGD("unload model %u begin.", model_id); if (!isInit_) { @@ -569,7 +599,7 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { Status ret = GraphLoader::DestroyAicpuSessionForInfer(model_id); if (ret != SUCCESS) { GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id); - return ACL_ERROR_GE_INTERNAL_ERROR; + return ret; } std::shared_ptr hybrid_davinci_model = @@ -587,11 +617,26 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { ret = GraphLoader::UnloadModel(model_id); if (ret != SUCCESS) { GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id); - return ACL_ERROR_GE_UNLOAD_MODEL; + return ret; } return SUCCESS; } +Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data) { + GELOGI("run model begin."); + if (!isInit_) { + GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); + return ACL_ERROR_GE_EXEC_NOT_INIT; + } + + InputData inputs; + GetDomiInputData(input_data, inputs); + OutputData outputs; + GetDomiOutputData(output_data, outputs); + + return GraphExecutor::DataInput(inputs, outputs); +} + // Get input and output descriptor Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector &input_desc, std::vector &output_desc, bool new_model_desc) { diff --git a/ge/executor/proto/op_mapping_info.proto b/ge/executor/proto/op_mapping_info.proto index 7fb6f84b..e23b7ebe 100644 --- a/ge/executor/proto/op_mapping_info.proto +++ b/ge/executor/proto/op_mapping_info.proto @@ -15,7 +15,6 @@ message Output { int32 original_output_data_type = 7; int32 original_output_format = 8; uint64 size = 9; - Shape origin_shape = 10; } message Input { @@ -24,7 +23,6 @@ message Input { Shape shape = 3; uint64 address = 4; uint64 size = 5; - Shape origin_shape = 6; } enum BufferType { diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index bfb612ea..e20456d5 100644 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -191,6 +191,8 @@ OMG_HOST_SRC_FILES := \ graph/passes/control_trigger_pass.cc \ graph/passes/cond_pass.cc \ graph/passes/cond_remove_pass.cc \ + graph/passes/remove_same_const_pass.cc \ + graph/passes/useless_control_out_remove_pass.cc \ graph/passes/for_pass.cc \ graph/passes/enter_pass.cc \ graph/passes/assign_pass.cc \ diff --git a/ge/ge_local_engine/engine/host_cpu_engine.cc b/ge/ge_local_engine/engine/host_cpu_engine.cc index e17f73de..c836d4d6 100644 --- a/ge/ge_local_engine/engine/host_cpu_engine.cc +++ b/ge/ge_local_engine/engine/host_cpu_engine.cc @@ -39,7 +39,7 @@ namespace { } \ ge_tensor = MakeShared(out_desc); \ GE_CHECK_NOTNULL(ge_tensor); \ - GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\ + GELOGI("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\ if (ge_tensor->SetData(reinterpret_cast(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) { \ GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str()); \ return MEMALLOC_FAILED; \ @@ -50,7 +50,8 @@ namespace { } else { \ ge_tensor = outputs[i]; \ GE_CHECK_NOTNULL(ge_tensor); \ - GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i); \ + GELOGI("node:%s existed output %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i, \ + reinterpret_cast(ge_tensor->GetData().data()), ge_tensor->GetData().size()); \ } \ auto tensor = TensorAdapter::AsTensor(*ge_tensor); \ auto tensor_name = op_desc->GetOutputNameByIndex(i); \ diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index 25718e9b..9706dadb 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -126,6 +126,8 @@ LIBGE_LOCAL_SRC_FILES := \ graph/passes/compile_nodes_pass.cc \ graph/passes/constant_folding_pass.cc \ graph/passes/constant_fuse_same_pass.cc \ + graph/passes/remove_same_const_pass.cc \ + graph/passes/useless_control_out_remove_pass.cc \ graph/passes/control_trigger_pass.cc \ graph/passes/dimension_adjust_pass.cc \ graph/passes/dimension_compute_pass.cc \ diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index acb029e9..2911614b 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -262,16 +262,26 @@ static Status CheckShapeReset(const OpDescPtr &op_desc, bool &change_shape_flag) change_shape_flag = true; } } + for (size_t i = 0; i < op_desc->GetAllOutputsDesc().size(); i++) { + auto output_desc = op_desc->MutableOutputDesc(static_cast(i)); + GE_CHECK_NOTNULL(output_desc); + // pass scalar output desc + auto dims = output_desc->GetShape().GetDims(); + if (dims.size() == kDynamicDimSize && dims[0] == kDynamicDimValue) { + change_shape_flag = true; + } + } return SUCCESS; } -static void ResetTensorVecShape(const vector &inputs, vector &inputs_dynamic) { +static Status ResetTensorVecShape(const vector &inputs, vector &inputs_dynamic) { for (auto input : inputs) { auto input_desc = input.GetTensorDesc(); GeShape shape_ori = input_desc.GetShape(); std::vector dynamic_shape_dims = {kDynamicDimValue}; GeShape dynamic_shape(dynamic_shape_dims); + std::vector> dynamic_shape_range; ge::GeTensor inputTensor; ge::GeTensorDesc desc(input_desc); @@ -279,12 +289,20 @@ static void ResetTensorVecShape(const vector &inputs, vector bool is_const = false; (void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const); if (!is_const && shape_ori.GetDims().size() > 0) { + int64_t storage_format = FORMAT_NCHW; + if (ge::AttrUtils::GetInt(desc, ge::ATTR_NAME_STORAGE_FORMAT, storage_format) && + !ge::AttrUtils::SetListInt(desc, ge::ATTR_NAME_STORAGE_SHAPE, dynamic_shape_dims)) { + GELOGE(FAILED, "Set attr ATTR_NAME_STORAGE_SHAPE fail."); + return FAILED; + } desc.SetShape(dynamic_shape); + desc.SetShapeRange(dynamic_shape_range); } inputTensor.SetTensorDesc(desc); inputs_dynamic.push_back(inputTensor); } + return SUCCESS; } class GeGenerator::Impl { @@ -528,6 +546,24 @@ bool GeGenerator::Impl::SetOppVersionInfo(AttrHolder &obj) { return true; } +static Status SetModelNameForDump(GeRootModelPtr ge_root_model) { + ModelHelper model_helper; + string model_name = ""; + GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); + Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), + model_name); + if (name_ret != SUCCESS) { + ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"}); + GELOGE(FAILED, "Get model_name failed. Param --output is invalid."); + return PARAM_INVALID; + } + map name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); + GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; + GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null"); + ge_model->SetName(model_name); + return SUCCESS; +} + Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector &inputs, ModelBufferData &model, bool is_offline) { rtContext_t ctx = nullptr; @@ -536,7 +572,6 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr GELOGD("Current ctx is null."); ctx = nullptr; } - GeRootModelPtr ge_root_model = nullptr; GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); impl_->is_offline_ = is_offline; @@ -560,22 +595,11 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr impl_->build_step_.c_str()); return SUCCESS; } - GE_CHECK_NOTNULL(ge_root_model); - GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); - ModelHelper model_helper; - string model_name = ""; - Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), - model_name); - if (name_ret != SUCCESS) { - ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"}); - GELOGE(FAILED, "Get model_name failed. Param --output is invalid."); - return PARAM_INVALID; + ret = SetModelNameForDump(ge_root_model); + if (ret != SUCCESS) { + return ret; } - map name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); - GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; - GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null"); - ge_model->SetName(model_name); ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model); if (ret != SUCCESS) { GELOGE(ret, "Save model failed"); @@ -584,11 +608,9 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr } return ret; } - if (ctx != nullptr) { (void)rtCtxSetCurrent(ctx); } - return SUCCESS; } @@ -682,8 +704,8 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in if (CheckShapeReset(op_desc, dynamic_flag) == SUCCESS && dynamic_flag) { vector inputs_dynamic; vector outputs_dynamic; - ResetTensorVecShape(inputs, inputs_dynamic); - ResetTensorVecShape(outputs, outputs_dynamic); + GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(inputs, inputs_dynamic)); + GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(outputs, outputs_dynamic)); GE_CHK_STATUS_RET_NOLOG( impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic)); } else { diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index 40b3b9dc..16d5d38f 100644 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -99,7 +99,7 @@ Status GraphMemoryAssigner::AssignMemory() { MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset()); memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); - if (mem_assigner->GetP2PMemOffset() >= 0) { + if (mem_assigner->GetP2PMemOffset() > 0) { MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset()); memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset); } diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index 37eb499a..77f8f237 100644 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -224,6 +224,7 @@ Status ModelBuilder::AdjustConstWeightSize(const ge::NodePtr &node, size_t &mem_ GeTensorDesc &tensor_desc = weight->MutableTensorDesc(); size_t output_size = weight->GetData().size(); TensorUtils::SetDataOffset(tensor_desc, mem_offset); + GELOGD("Node: %s, weight size: %zu.", node->GetName().c_str(), output_size); mem_offset += output_size; } return SUCCESS; @@ -581,9 +582,13 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { // Add TBE Kernels and custom aicpu op bin std::set tbe_name_set; std::set aicpu_name_set; + std::set aicpu_op_types; + std::set aicpu_tf_op_types; for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); + // check aicpu op type + CollectCheckAicpuAttr(node_op_desc, aicpu_op_types, aicpu_tf_op_types); TBEKernelPtr tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); if (tbe_kernel == nullptr) { std::string kernel_name; @@ -605,6 +610,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { tbe_kernel_store_.AddTBEKernel(tbe_kernel); } + SetModelCheckAicpuAttr(model, aicpu_op_types, aicpu_tf_op_types); + for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); @@ -796,4 +803,51 @@ Status ModelBuilder::CompileSingleOp() { GE_TIMESTAMP_CALLNUM_END(BatchCompileOp, "GraphBuild::CompileOp"); return ge::SUCCESS; } + +void ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set &aicpu_op_types, + std::set &aicpu_tf_op_types) { + std::string aicpu_optype; + bool has_attr_check_cpu = ge::AttrUtils::GetStr(op_desc, "needCheckCpu", aicpu_optype); + std::vector tf_optypes; + bool has_attr_check_tf = ge::AttrUtils::GetListStr(op_desc, "needCheckTf", tf_optypes); + if (has_attr_check_cpu && !aicpu_optype.empty()) { + aicpu_op_types.insert(aicpu_optype); + } + + if (has_attr_check_tf && !tf_optypes.empty()) { + aicpu_tf_op_types.insert(tf_optypes.begin(), tf_optypes.end()); + } + + return; +} + +void ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set &aicpu_op_types, + std::set &aicpu_tf_op_types) { + std::vector aicpu_optype_list; + std::vector aicpu_tf_optype_list; + if (ge::AttrUtils::GetListStr(&model, "needCheckCpu", aicpu_optype_list)) { + GELOGI("Already have aicpu optype size: %zu", aicpu_optype_list.size()); + aicpu_op_types.insert(aicpu_optype_list.begin(), aicpu_optype_list.end()); + } + + if (ge::AttrUtils::GetListStr(&model, "needCheckTf", aicpu_tf_optype_list)) { + GELOGI("Already have aicpu tf optype size: %zu", aicpu_tf_optype_list.size()); + aicpu_tf_op_types.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end()); + } + + // reset list with set + aicpu_optype_list.assign(aicpu_op_types.begin(), aicpu_op_types.end()); + aicpu_tf_optype_list.assign(aicpu_tf_op_types.begin(), aicpu_tf_op_types.end()); + GELOGI( + "Check Aicpu op types ComputeGraph: %s aicpu_op_types: %zu, aicpu_optype_list: %zu, aicpu_tf_op_types: %zu, " + "aicpu_tf_optype_list:%zu.", + compute_graph_->GetName().c_str(), aicpu_op_types.size(), aicpu_optype_list.size(), aicpu_tf_op_types.size(), + aicpu_tf_optype_list.size()); + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return, + "Set attr needCheckCpu fail."); + + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckTf", aicpu_tf_optype_list), return, + "Set attr needCheckTf fail."); + return; +} } // namespace ge diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h index e75521c7..de079768 100644 --- a/ge/graph/build/model_builder.h +++ b/ge/graph/build/model_builder.h @@ -83,6 +83,12 @@ class ModelBuilder { Status CompileSingleOp(); + void CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set &aicpu_op_types, + std::set &aicpu_tf_op_types); + + void SetModelCheckAicpuAttr(ge::Model &model, std::set &aicpu_op_types, + std::set &aicpu_tf_op_types); + uint64_t session_id_; map mem_type_to_mem_offset_; diff --git a/ge/graph/build/stream_graph_optimizer.cc b/ge/graph/build/stream_graph_optimizer.cc index 5741e6b3..ea0a7698 100644 --- a/ge/graph/build/stream_graph_optimizer.cc +++ b/ge/graph/build/stream_graph_optimizer.cc @@ -66,13 +66,13 @@ bool StreamGraphOptimizer::IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &com if (AttrUtils::GetStr(cur_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { label_set.insert(batch_label); } else { - GELOGD("Node %s[%s] has no batch label, subgraph %s, stream id: %ld", cur_node->GetName().c_str(), + GELOGD("Node %s[%s] has no batch_label, subgraph %s, stream id: %ld ", cur_node->GetName().c_str(), cur_node->GetType().c_str(), comp_graph->GetName().c_str(), stream_id); continue; } - GELOGD("Node %s in subgraph %s stream id: %ld, node num: %zu", cur_node->GetName().c_str(), - comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize()); + GELOGD("Node %s in subgraph %s stream id: %ld, batch_label: %s, node num: %zu", cur_node->GetName().c_str(), + comp_graph->GetName().c_str(), stream_id, batch_label.c_str(), comp_graph->GetDirectNodesSize()); } if (stream_set.size() > 1 || label_set.size() > 1) { GELOGI("Nodes of graph: %s have different stream id or batch_label, node num: %zu, different stream num: %zu.", @@ -126,12 +126,14 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com run_context.graphStreamList.size()); return FAILED; } + run_context.stream = run_context.graphStreamList[stream_id]; std::string batch_label; (void)AttrUtils::GetStr(subgraph, ATTR_NAME_BATCH_LABEL, batch_label); GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu, " - "batch_label: %s", subgraph->GetName().c_str(), engine_name.c_str(), stream_id, + "batch_label: %s", subgraph->GetName().c_str(), engine_name.c_str(), stream_id, static_cast(reinterpret_cast(run_context.stream)), batch_label.c_str()); + for (auto iter = graph_optimizers.begin(); iter != graph_optimizers.end(); ++iter) { GE_CHECK_NOTNULL(*iter); Status ret = (*iter)->OptimizeStreamGraph(*subgraph, run_context); diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index 2089ad31..7e45ad61 100644 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -54,7 +54,7 @@ const uint64_t kProfilingFpStartLogid = 1; const uint64_t kProfilingBpEndLogid = 2; const uint64_t kProfilingArStartLogid = 3; const uint64_t kProfilingArEndLogid = 4; -const uint64_t kProfilingIterEndLogid = 255; +const uint64_t kProfilingIterEndLogid = 65535; const int64_t kHashFactor = 100000; const int64_t kInvalidGroupId = -1; } // namespace diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc index cb68533e..44556422 100644 --- a/ge/graph/load/graph_loader.cc +++ b/ge/graph/load/graph_loader.cc @@ -122,14 +122,14 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string ModelData &model_data) { Status ret; if (!CheckInputPathValid(path)) { - GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str()); - return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; + GELOGE(GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str()); + return GE_EXEC_MODEL_PATH_INVALID; } GELOGI("Load model begin, model path is: %s", path.c_str()); if (!key_path.empty() && !CheckInputPathValid(key_path)) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "decrypt_key path is invalid: %s", key_path.c_str()); - return ACL_ERROR_GE_PARAM_INVALID; + GELOGE(GE_EXEC_MODEL_KEY_PATH_INVALID, "decrypt_key path is invalid: %s", key_path.c_str()); + return GE_EXEC_MODEL_KEY_PATH_INVALID; } ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data); @@ -144,6 +144,63 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string return SUCCESS; } +Status GraphLoader::LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority, + const std::shared_ptr &listener, uint32_t &model_id) { + Status ret; + ModelData model_data; + ret = LoadDataFromFile(path, key_path, priority, model_data); + if (ret != SUCCESS) { + GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret); + if (model_data.model_data != nullptr) { + delete[] static_cast(model_data.model_data); + model_data.model_data = nullptr; + } + return ret; + } + + ret = LoadModel(model_data, listener, model_id); + if (ret != SUCCESS) { + GELOGE(ret, "LoadModel: Load failed. ret = %u", ret); + if (model_data.model_data != nullptr) { + delete[] static_cast(model_data.model_data); + model_data.model_data = nullptr; + } + } + + if (model_data.model_data != nullptr) { + delete[] static_cast(model_data.model_data); + model_data.model_data = nullptr; + } + + return ret; +} + +Status GraphLoader::LoadModel(const ModelData &model_data, const std::shared_ptr &listener, + uint32_t &model_id) { + GELOGI("Load model begin, model_id:%u.", model_id); + + // For GeOp, Open Device 0 here. + GE_CHK_RT_RET(rtSetDevice(0)); + auto model_manager = ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + Status ret = model_manager->LoadModelOffline(model_id, model_data, listener); + if (ret != SUCCESS) { + GE_CHK_RT(rtDeviceReset(0)); + GELOGE(ret, "LoadModel: Load failed."); + return ret; + } + ret = model_manager->Start(model_id); + if (ret != SUCCESS) { + if (model_manager->Unload(model_id) != SUCCESS) { + GELOGE(FAILED, "LoadModel: Unload failed while trying to unload after a failed start."); + } + GELOGE(ret, "LoadModel: Start failed."); + return ret; + } + GELOGI("LoadModel: Start model success, model_id:%u.", model_id); + return SUCCESS; +} + Status GraphLoader::CommandHandle(const Command &command) { try { auto model_manager = ModelManager::GetInstance(); @@ -168,16 +225,16 @@ Status GraphLoader::CommandHandle(const Command &command) { } Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, - size_t mem_size, void *weight_ptr, size_t weight_size) { + size_t memsize, void *weight_ptr, size_t weightsize) { GELOGI("Load model begin, model_id:%u.", model_id); // For ACL, Open Device from App. auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); Status ret = model_manager->LoadModelOffline( - model_id, model_data, nullptr, dev_ptr, mem_size, weight_ptr, weight_size); + model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize); if (ret != SUCCESS) { - GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model failed, model_id:%u.", model_id); - return ACL_ERROR_GE_LOAD_MODEL; + GELOGE(ret, "Load model failed, model_id:%u.", model_id); + return ret; } GELOGI("Load model success, model_id:%u.", model_id); return SUCCESS; @@ -202,8 +259,8 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da GE_CHECK_NOTNULL(model_manager); Status ret = model_manager->LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids); if (ret != SUCCESS) { - GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model with queue failed, model_id:%u.", model_id); - return ACL_ERROR_GE_LOAD_MODEL; + GELOGE(ret, "Load model with queue failed, model_id:%u.", model_id); + return ret; } GELOGI("Load model with queue success, model_id:%u.", model_id); diff --git a/ge/graph/load/graph_loader.h b/ge/graph/load/graph_loader.h index 3a13a113..974af5c1 100644 --- a/ge/graph/load/graph_loader.h +++ b/ge/graph/load/graph_loader.h @@ -44,6 +44,12 @@ class GraphLoader { static Status GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size); + static Status LoadModel(const ModelData &model_data, const std::shared_ptr &listener, + uint32_t &model_id); + + static Status LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority, + const std::shared_ptr &listener, uint32_t &model_id); + static Status CommandHandle(const Command &command); static Status GetMemoryInfo(int64_t &free); diff --git a/ge/graph/load/new_model_manager/data_dumper.cc b/ge/graph/load/new_model_manager/data_dumper.cc index 6f65e907..b331d780 100644 --- a/ge/graph/load/new_model_manager/data_dumper.cc +++ b/ge/graph/load/new_model_manager/data_dumper.cc @@ -319,9 +319,6 @@ Status DataDumper::GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vis for (auto dim : tensor_descs.at(index).GetShape().GetDims()) { output.mutable_shape()->add_dim(dim); } - for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) { - output.mutable_origin_shape()->add_dim(dim); - } int64_t output_size = 0; if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) { GELOGE(PARAM_INVALID, "Get output size filed"); @@ -479,9 +476,6 @@ Status DataDumper::GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor for (auto dim : tensor_descs.at(index).GetShape().GetDims()) { input.mutable_shape()->add_dim(dim); } - for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) { - input.mutable_origin_shape()->add_dim(dim); - } int64_t input_size = 0; if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) { GELOGI("Get aipp input size according to attr is %ld", input_size); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index eae6de13..02aa3b61 100644 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -289,8 +289,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh if (weight_ptr == nullptr) { weights_mem_base_ = MallocWeightsMem(weights_size); if (weights_mem_base_ == nullptr) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc weight memory failed. size: %zu", weights_size); - return ACL_ERROR_GE_MEMORY_ALLOCATION; + GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size); + return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED; } is_inner_weight_base_ = true; } @@ -307,8 +307,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { if (is_feature_map_mem_has_inited_) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "call InitFeatureMapMem more than once ."); - return ACL_ERROR_GE_MEMORY_ALLOCATION; + GELOGE(FAILED, "call InitFeatureMapMem more than once ."); + return FAILED; } is_feature_map_mem_has_inited_ = true; @@ -316,8 +316,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size; if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); - return ACL_ERROR_GE_MEMORY_ALLOCATION; + GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); + return FAILED; } mem_base_ = static_cast(dev_ptr); @@ -327,8 +327,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { if (TotalMemSize() && mem_base_ == nullptr) { mem_base_ = MallocFeatureMapMem(data_size); if (mem_base_ == nullptr) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc feature map memory failed. size: %zu", data_size); - return ACL_ERROR_GE_MEMORY_ALLOCATION; + GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size); + return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED; } GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, mem_base_, data_size); @@ -343,8 +343,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { if (p2p_data_size != 0) { p2p_mem_base_ = MallocP2PMem(p2p_data_size); if (p2p_mem_base_ == nullptr) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc p2p memory failed,size: %zu", p2p_data_size); - return ACL_ERROR_GE_MEMORY_ALLOCATION; + GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size); + return GE_EXEC_ALLOC_P2P_MEM_FAILED; } GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, p2p_mem_base_, p2p_data_size); @@ -485,6 +485,8 @@ Status DavinciModel::DoTaskSink() { GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed."); + GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed."); GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed."); @@ -710,7 +712,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size } // collect profiling for ge - GE_CHK_STATUS_RET(InitModelProfile(), "Init model profile failed"); auto &profiling_manager = ProfilingManager::Instance(); if (profiling_manager.ProfilingModelLoadOn()) { Status p_ret = ReportProfilingData(); @@ -2087,61 +2088,12 @@ Status DavinciModel::SyncVarData() { return ret; } -Status DavinciModel::InitModelProfile() { - for (const auto &task : task_list_) { - GE_CHECK_NOTNULL(task); - const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo(); - // when type is RT_MODEL_TASK_KERNEL, ctx is not null - if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) { - continue; - } - - GELOGI("task.id = %u, opNum = %zu", task->GetTaskID(), fusion_op_info->original_op_names.size()); - op_id_map_.insert(std::make_pair(fusion_op_info->op_index, task->GetTaskID())); - } - - std::set task_id_set; - using CIT = std::multimap::const_iterator; - using Range = std::pair; - for (const auto &task : task_list_) { - GE_CHECK_NOTNULL(task); - const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo(); - if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) { - continue; - } - - if (task_id_set.count(task->GetTaskID()) > 0) { - continue; - } - - const auto &op_desc = GetOpByIndex(fusion_op_info->op_index); - GE_CHK_BOOL_EXEC(op_desc != nullptr, return FAILED, "index: %u out of range", fusion_op_info->op_index); - - ProfileInfo profile; - profile.fusion_info = *fusion_op_info; - Range range = op_id_map_.equal_range(fusion_op_info->op_index); - for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) { - profile.task_count++; - task_id_set.insert(range_idx->second); - } - - // memory info - TaskMemInfo &mem_info = profile.memory_info; - const auto input_size = ModelUtils::GetInputSize(op_desc); - const auto output_size = ModelUtils::GetOutputSize(op_desc); - const auto workspace_size = ModelUtils::GetWorkspaceSize(op_desc); - const auto weight_size = ModelUtils::GetWeightSize(op_desc); - mem_info.input_size = std::accumulate(input_size.begin(), input_size.end(), 0); - mem_info.output_size = std::accumulate(output_size.begin(), output_size.end(), 0); - mem_info.workspace_size = std::accumulate(workspace_size.begin(), workspace_size.end(), 0); - mem_info.weight_size = std::accumulate(weight_size.begin(), weight_size.end(), 0); - mem_info.total_size = mem_info.weight_size + mem_info.input_size + mem_info.output_size + mem_info.workspace_size; - - profile_list_.emplace_back(profile); +inline int64_t SumSize(const vector &size_list) { + int64_t sum_size = 0; + for (const int64_t &size : size_list) { + sum_size += size; } - - GELOGI("fusion task size: %zu, profile info size: %zu", op_id_map_.size(), profile_list_.size()); - return SUCCESS; + return sum_size; } Status DavinciModel::SinkModelProfile() { @@ -2149,12 +2101,18 @@ Status DavinciModel::SinkModelProfile() { auto &prof_mgr = ProfilingManager::Instance(); ReporterData reporter_data{}; // report model data tag name - std::string tag_name("model_load_info_" + std::to_string(this->Id())); + std::string tag_name; + tag_name.append("model_load_info_").append(std::to_string(this->Id())); GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, return FAILED, "Sink model tag memcpy error."); // Model Header - std::string name = om_name_.empty() ? name_ : om_name_; + string name; + if (!om_name_.empty()) { + name = om_name_; + } else { + name = name_; + } size_t name_len = name.size(); reporter_data.deviceId = device_id_; reporter_data.data = (unsigned char *)&name_len; @@ -2186,71 +2144,128 @@ Status DavinciModel::SinkModelProfile() { GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u.", this->Id()); + int32_t task_num = task_list_.size(); + std::multimap op_id_map; + std::set task_id_set; + for (int32_t i = 0; i < task_num; i++) { + auto task = task_list_[i]; + GE_CHECK_NOTNULL(task); + auto fusion_op_info = task->GetFusionOpInfo(); + // when type is RT_MODEL_TASK_KERNEL, ctx is not null + if (fusion_op_info != nullptr) { + uint32_t op_num = fusion_op_info->original_op_names.size(); + uint32_t task_id = task->GetTaskID(); + if (op_num > 0) { + GELOGI("task.id = %u, opNum = %u", task_id, op_num); + op_id_map.insert(std::make_pair(fusion_op_info->op_index, task_id)); + } + } + } + + struct memoryInfo { + int64_t input_size; + int64_t output_size; + int64_t weight_size; + int64_t workspace_size; + int64_t total_size; + + memoryInfo() : input_size(0), output_size(0), weight_size(0), workspace_size(0), total_size(0) {} + }; + using CIT = std::multimap::const_iterator; using Range = std::pair; - for (const ProfileInfo &profile : profile_list_) { - // op name after fusion - string fusion_op_name = profile.fusion_info.op_name; - int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); - reporter_data.data = (unsigned char *)&fusion_op_name_len; - reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - reporter_data.data = (unsigned char *)fusion_op_name.c_str(); - reporter_data.dataLen = fusion_op_name_len; - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - // original op name before fusion - uint32_t op_num = profile.fusion_info.original_op_names.size(); - reporter_data.data = (unsigned char *)&op_num; - reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - for (uint32_t k = 0; k < op_num; k++) { - std::string op_name = profile.fusion_info.original_op_names[k]; - int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); - reporter_data.data = (unsigned char *)&op_name_len; + for (int32_t i = 0; i < task_num; i++) { + auto task = task_list_[i]; + GE_CHECK_NOTNULL(task); + auto fusion_op_info = task->GetFusionOpInfo(); + if (fusion_op_info != nullptr && fusion_op_info->original_op_names.size() > 0) { + uint32_t task_id = task->GetTaskID(); + uint32_t op_num = fusion_op_info->original_op_names.size(); + uint32_t task_count = 0; + if (task_id_set.count(task_id) != 0) { + continue; + } + + uint32_t op_id = fusion_op_info->op_index; + Range range = op_id_map.equal_range(op_id); + for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) { + task_count++; + uint32_t task_id = range_idx->second; + task_id_set.insert(task_id); + } + + // op name after fusion + string fusion_op_name = fusion_op_info->op_name; + int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); + reporter_data.data = (unsigned char *)&fusion_op_name_len; reporter_data.dataLen = sizeof(int32_t); GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u.", this->Id()); - reporter_data.data = (unsigned char *)op_name.c_str(); - reporter_data.dataLen = op_name_len; + + reporter_data.data = (unsigned char *)fusion_op_name.c_str(); + reporter_data.dataLen = fusion_op_name_len; + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); + + // original op name before fusion + reporter_data.data = (unsigned char *)&op_num; + reporter_data.dataLen = sizeof(int32_t); GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u.", this->Id()); - } - // stream id info - uint32_t streamId = profile.fusion_info.stream_id; - reporter_data.data = (unsigned char *)&streamId; - reporter_data.dataLen = sizeof(int32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - // memory info - reporter_data.data = (unsigned char *)&profile.memory_info; - reporter_data.dataLen = sizeof(profile.memory_info); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - // task info - reporter_data.data = (unsigned char *)&profile.task_count; - reporter_data.dataLen = sizeof(uint32_t); - GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, - "Reporter data fail, model id:%u.", this->Id()); - - Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index); - for (CIT idx = task_range.first; idx != task_range.second; ++idx) { - uint32_t task_id = idx->second; - reporter_data.data = (unsigned char *)&task_id; + for (uint32_t k = 0; k < op_num; k++) { + std::string op_name = fusion_op_info->original_op_names[k]; + int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); + reporter_data.data = (unsigned char *)&op_name_len; + reporter_data.dataLen = sizeof(int32_t); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); + reporter_data.data = (unsigned char *)op_name.c_str(); + reporter_data.dataLen = op_name_len; + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); + } + + // stream id info + uint32_t streamId = task->GetStreamId(); + reporter_data.data = (unsigned char *)&streamId; + reporter_data.dataLen = sizeof(int32_t); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); + + // memory info + struct memoryInfo memory_info; + uint32_t op_index = fusion_op_info->op_index; + auto iter = op_list_.find(op_index); + GE_CHK_BOOL_EXEC(iter != op_list_.end(), return FAILED, "index is out of range, index: %u", op_index); + auto op_desc = iter->second; + memory_info.input_size = SumSize(ModelUtils::GetInputSize(op_desc)); + memory_info.output_size = SumSize(ModelUtils::GetOutputSize(op_desc)); + memory_info.workspace_size = SumSize(ModelUtils::GetWorkspaceSize(op_desc)); + memory_info.weight_size = SumSize(ModelUtils::GetWeightSize(op_desc)); + memory_info.total_size = + memory_info.weight_size + memory_info.input_size + memory_info.output_size + memory_info.workspace_size; + reporter_data.data = (unsigned char *)&memory_info; + reporter_data.dataLen = sizeof(struct memoryInfo); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); + + // task info + reporter_data.data = (unsigned char *)&task_count; reporter_data.dataLen = sizeof(uint32_t); GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, "Reporter data fail, model id:%u.", this->Id()); + + Range task_range = op_id_map.equal_range(op_id); + for (CIT idx = task_range.first; idx != task_range.second; ++idx) { + uint32_t task_id = idx->second; + reporter_data.data = (unsigned char *)&task_id; + reporter_data.dataLen = sizeof(uint32_t); + GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, + "Reporter data fail, model id:%u.", this->Id()); + } } } - return SUCCESS; } @@ -2824,19 +2839,19 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector &inputs, const return SUCCESS; } -Status DavinciModel::UpdateKnownZeroCopyAddr(vector &total_io_addrs) { - for (size_t i = 0; i < total_io_addrs.size(); ++i) { - auto it_in = knonw_input_data_info_.find(total_io_addrs[i]); +Status DavinciModel::UpdateKnownZeroCopyAddr() { + for (size_t i = 0; i < total_io_addrs_.size(); ++i) { + auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]); if (it_in != knonw_input_data_info_.end()) { - GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i], - knonw_input_data_info_.at(total_io_addrs[i])); - total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]); + GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], + knonw_input_data_info_.at(total_io_addrs_[i])); + total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]); } - auto it_out = knonw_output_data_info_.find(total_io_addrs[i]); + auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]); if (it_out != knonw_output_data_info_.end()) { - GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i], - knonw_output_data_info_.at(total_io_addrs[i])); - total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]); + GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], + knonw_output_data_info_.at(total_io_addrs_[i])); + total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]); } } GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success."); @@ -2865,7 +2880,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector &inputs, const vec } else { total_io_addrs_ = orig_total_io_addrs_; } - GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_), "DavinciModel::UpdateKnownZeroCopyAddr failed."); + GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed."); if (total_args_size_ == 0) { GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_); @@ -2932,14 +2947,7 @@ Status DavinciModel::MallocKnownArgs() { GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } - // malloc dynamic and static hybrid memory - if (total_hybrid_args_size_ != 0) { - rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); - return RT_ERROR_TO_GE_STATUS(rt_ret); - } - } + // malloc fixed addr memory, eg: rts op if (total_fixed_addr_size_ != 0) { GELOGI("Begin to allocate fixed addr."); diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index be8efd90..8120ee71 100644 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -76,20 +76,6 @@ struct timeInfo { int64_t dumpEndTime; }; -struct TaskMemInfo { - int64_t input_size{0}; - int64_t output_size{0}; - int64_t weight_size{0}; - int64_t workspace_size{0}; - int64_t total_size{0}; -}; - -struct ProfileInfo { - FusionOpInfo fusion_info; - TaskMemInfo memory_info; - uint32_t task_count{0}; -}; - enum ExecuteMode { INITIALIZATION, SYNCHRONIZATION, @@ -240,6 +226,8 @@ class DavinciModel { const vector &GetDataList() const { return data_op_list_; } // get Op + const map &GetOpList() const { return op_list_; } + OpDescPtr GetOpByIndex(uint32_t index) const { if (op_list_.find(index) == op_list_.end()) { return nullptr; @@ -448,6 +436,10 @@ class DavinciModel { int64_t GetLoadEndTime() { return load_end_time_; } + Status SinkModelProfile(); + + Status SinkTimeProfile(const InputData ¤t_data); + Status ReportProfilingData(); void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) { @@ -484,14 +476,6 @@ class DavinciModel { void SetTotalIOAddrs(vector &io_addrs) { total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end()); } - void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; } - uint32_t GetHybridArgsSize() { - return total_hybrid_args_size_; - } - void *GetCurrentHybridArgsAddr(uint32_t offset) { - void *cur_args = static_cast(hybrid_addrs_) + offset; - return cur_args; - } void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size); int64_t GetFixedAddrsSize(string tensor_name); void *GetCurrentFixedAddr(int64_t offset) const { @@ -510,7 +494,7 @@ class DavinciModel { Status MallocKnownArgs(); Status UpdateKnownNodeArgs(const vector &inputs, const vector &outputs); Status CreateKnownZeroCopyMap(const vector &inputs, const vector &outputs); - Status UpdateKnownZeroCopyAddr(vector &total_io_addrs); + Status UpdateKnownZeroCopyAddr(); void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); @@ -812,11 +796,6 @@ class DavinciModel { void SetDataDumperArgs(const ComputeGraphPtr &compute_graph); - Status InitModelProfile(); - Status SinkModelProfile(); - - Status SinkTimeProfile(const InputData ¤t_data); - Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, std::vector &outputs); @@ -952,8 +931,6 @@ class DavinciModel { void *args_ = nullptr; void *args_host_ = nullptr; void *fixed_addrs_ = nullptr; - void *hybrid_addrs_ = nullptr; - uint32_t total_hybrid_args_size_ = 0; int64_t total_fixed_addr_size_ = 0; std::map knonw_input_data_info_; std::map knonw_output_data_info_; @@ -993,9 +970,6 @@ class DavinciModel { // key: input_index: input is merge node; value: each gear info and each output shape std::map, vector>> merge_nodes_gear_and_real_out_shape_info_; std::vector> all_gears_info_; - - std::multimap op_id_map_; - std::vector profile_list_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 0dbeb38e..80c6191b 100644 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -18,6 +18,7 @@ #include +#include "aicpu/aicpu_schedule/aicpu_op_type_list.h" #include "common/dump/dump_manager.h" #include "common/l2_cache_optimize.h" #include "common/profiling/profiling_manager.h" @@ -30,6 +31,7 @@ #include "graph/load/new_model_manager/davinci_model_parser.h" #include "model/ge_root_model.h" #include "graph/common/local_context.h" +#include "graph/utils/attr_utils.h" #include "common/formats/utils/formats_trans_utils.h" #include "hybrid/hybrid_davinci_model.h" @@ -52,6 +54,7 @@ const char *const kDeleteCustOp = "deleteCustOp"; const int kTimeSpecNano = 1000000000; const int kTimeSpecMiro = 1000000; const int kSessionMaxBias = 100; +const int kOpNameMaxSize = 100; struct CustAicpuSoBuf { uint64_t kernelSoBuf; uint32_t kernelSoBufLen; @@ -89,7 +92,6 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u if (op_type == aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY) { std::vector v_aicpu_kernel; std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); - std::lock_guard lock(map_mutex_); auto iter = model_aicpu_kernel_.find(model_key); if (iter != model_aicpu_kernel_.end()) { GELOGD("kernel destroy session_id %lu, model_id %u.", session_id, model_id); @@ -177,7 +179,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u } void ModelManager::DestroyAicpuSession(uint64_t session_id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(sess_ids_mutex_); auto it = sess_ids_.find(session_id); if (it == sess_ids_.end()) { GELOGI("The session: %lu not created.", session_id); @@ -206,7 +208,7 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) { } ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); auto hybrid_davinci_model = hybrid_model_map_.find(model_id); if (hybrid_davinci_model != hybrid_model_map_.end()) { uint64_t session_id = hybrid_davinci_model->second->GetSessionId(); @@ -216,8 +218,8 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { auto it = model_map_.find(model_id); if (it == model_map_.end()) { - GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); - return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID; + GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); + return GE_EXEC_MODEL_ID_INVALID; } uint64_t session_id = it->second->GetSessionId(); DestroyAicpuSession(session_id); @@ -226,7 +228,7 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) { GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id); - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id); @@ -239,7 +241,7 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_ } ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); std::vector v_aicpu_kernel; std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { @@ -251,7 +253,7 @@ ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_i } ModelManager::~ModelManager() { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); model_map_.clear(); model_aicpu_kernel_.clear(); cust_aicpu_so_.clear(); @@ -359,18 +361,18 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr &davinci_model) { GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", id); - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); model_map_[id] = davinci_model; } void ModelManager::InsertModel(uint32_t id, shared_ptr &hybrid_model) { GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", id); - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); hybrid_model_map_[id] = hybrid_model; } Status ModelManager::DeleteModel(uint32_t id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); auto it = model_map_.find(id); auto hybrid_model_it = hybrid_model_map_.find(id); @@ -385,22 +387,22 @@ Status ModelManager::DeleteModel(uint32_t id) { } else if (hybrid_model_it != hybrid_model_map_.end()) { (void)hybrid_model_map_.erase(hybrid_model_it); } else { - GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id); - return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID; + GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id); + return GE_EXEC_MODEL_ID_INVALID; } return SUCCESS; } std::shared_ptr ModelManager::GetModel(uint32_t id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); auto it = model_map_.find(id); return (it == model_map_.end()) ? nullptr : it->second; } std::shared_ptr ModelManager::GetHybridModel(uint32_t id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(map_mutex_); auto it = hybrid_model_map_.find(id); return (it == hybrid_model_map_.end()) ? nullptr : it->second; @@ -903,7 +905,7 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); davinci_model->SetModelDescVersion(new_model_desc); @@ -971,9 +973,8 @@ Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id, } Status ModelManager::GetCurShape(const uint32_t model_id, std::vector &batch_info, int32_t &dynamic_type) { - auto davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetCurShape Failed, Invalid Model ID %u!", model_id); + std::shared_ptr davinci_model = GetModel(model_id); + GE_CHECK_NOTNULL(davinci_model); davinci_model->GetCurShape(batch_info, dynamic_type); return SUCCESS; } @@ -986,8 +987,7 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector &dynami } std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetModelAttr Failed, Invalid Model ID %u!", model_id); + GE_CHECK_NOTNULL(davinci_model); davinci_model->GetModelAttr(dynamic_output_shape_info); return SUCCESS; } @@ -997,8 +997,9 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, std::vector &inputFormats, std::vector &outputFormats) { std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", + model_id); + return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats); } @@ -1013,14 +1014,18 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetAIPPInfo failed, invalid model_id is %u.", model_id); + "GetAIPPInfo failed, invalid model_id is %u.", + model_id); + return davinci_model->GetAIPPInfo(index, aipp_info); } Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetAIPPInfo failed, invalid model_id is %u.", model_id); + "GetAIPPInfo failed, invalid model_id is %u.", + model_id); + return davinci_model->GetAippType(index, type, aipp_index); } @@ -1053,15 +1058,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model mmTimespec timespec = mmGetTickCount(); ModelHelper model_helper; - Status ret = model_helper.LoadRootModel(model); - if (model_helper.GetModelType()) { - bool is_shape_unknown = false; - GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown), - "CheckIsUnknownShape failed, model id:%u", model_id); - if (is_shape_unknown || GetContext().GetHostExecFlag()) { - return DoLoadHybridModelOnline(model_id, model_helper.GetGeRootModel(), listener); - } - } + Status ret = model_helper.LoadModel(model); if (ret != SUCCESS) { GELOGE(ret, "load model failed."); return ret; @@ -1075,8 +1072,8 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed"); return ACL_ERROR_GE_MEMORY_ALLOCATION; } catch (...) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed since other exception raise"); - return ACL_ERROR_GE_MEMORY_ALLOCATION; + GELOGE(INTERNAL_ERROR, "Make shared failed since other exception raise"); + return INTERNAL_ERROR; } ret = davinci_model->Assign(ge_model); if (ret != SUCCESS) { @@ -1088,7 +1085,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model int32_t device_id = 0; rtError_t rt_ret = rtGetDevice(&device_id); if (rt_ret != RT_ERROR_NONE || device_id < 0) { - GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); + GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); return RT_ERROR_TO_GE_STATUS(rt_ret); } davinci_model->SetDeviceId(device_id); @@ -1220,7 +1217,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "Invalid model id %u, check whether model has been loaded or not.", model_id); + "Invalid model id %u, check weather model has been loaded or not.", model_id); if (davinci_model->NeedDestroyAicpuKernel()) { GELOGI("Start to destroy specified aicpu kernel."); @@ -1243,7 +1240,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy } Status ModelManager::CreateAicpuSession(uint64_t session_id) { - std::lock_guard lock(map_mutex_); + std::lock_guard lock(sess_ids_mutex_); auto it = sess_ids_.find(session_id); // never been created by any model if (it == sess_ids_.end()) { @@ -1462,7 +1459,8 @@ void ModelManager::GenModelId(uint32_t *id) { if (id == nullptr) { return; } - std::lock_guard lock(map_mutex_); + + std::lock_guard lock(map_mutex_); *id = ++max_model_id_; } @@ -1534,4 +1532,200 @@ Status ModelManager::EnableExceptionDump(const std::map &options return SUCCESS; } +Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_optype_list, + std::vector &aicpu_tf_optype_list) { + std::string kernel_name = "checkOpType"; + GELOGI("LaunchKernelCheckAicpuOpType in, kernel name %s", kernel_name.c_str()); + std::lock_guard lock(cust_aicpu_mutex_); + std::vector req_aicpu_op_info_list; + std::vector res_aicpu_op_info_list; + std::vector res_ret_code_list; + + if (aicpu_optype_list.empty() && aicpu_tf_optype_list.empty()) { + GELOGI("No need to check aicpu op type."); + return SUCCESS; + } + + vector allocated_mem; + rtError_t status; + rtStream_t stream = nullptr; + void *args = nullptr; + + void *d_req_op_list = nullptr; + void *d_res_op_list = nullptr; + void *d_ret_code_list = nullptr; + + size_t aicpu_op_nums = aicpu_optype_list.size(); + size_t tf_op_nums = aicpu_tf_optype_list.size(); + size_t op_nums = aicpu_op_nums + tf_op_nums; + // malloc sysOpInfoList in SysOpCheckInfo + status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + return RT_ERROR_TO_GE_STATUS(status); + } + allocated_mem.push_back(d_req_op_list); + + // malloc sysOpInfoList in SysOpCheckResp + status = rtMalloc(&d_res_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + return RT_ERROR_TO_GE_STATUS(status); + } + allocated_mem.push_back(d_res_op_list); + + // malloc returnCodeList in SysOpCheckResp + status = rtMalloc(&d_ret_code_list, op_nums * sizeof(ReturnCode), RT_MEMORY_HBM); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + return RT_ERROR_TO_GE_STATUS(status); + } + allocated_mem.push_back(d_ret_code_list); + + for (const auto &op_type : aicpu_optype_list) { + SysOpInfo op_info; + // malloc op_type name in SysOpInfo + void *d_op_type_name = nullptr; + status = rtMalloc(&d_op_type_name, op_type.length(), RT_MEMORY_HBM); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + return RT_ERROR_TO_GE_STATUS(status); + } + allocated_mem.push_back(d_op_type_name); + GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.length(), op_type.c_str(), op_type.length(), RT_MEMCPY_HOST_TO_DEVICE)); + op_info.opType = static_cast(reinterpret_cast(d_op_type_name)); + op_info.opLen = op_type.length(); + op_info.kernelsType = CPU_KERNEL; + req_aicpu_op_info_list.emplace_back(op_info); + } + + for (const auto &op_type : aicpu_tf_optype_list) { + SysOpInfo op_info; + // malloc op_type name in SysOpInfo + void *d_op_type_name = nullptr; + status = rtMalloc(&d_op_type_name, op_type.size(), RT_MEMORY_HBM); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + return RT_ERROR_TO_GE_STATUS(status); + } + allocated_mem.push_back(d_op_type_name); + GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.size(), op_type.c_str(), op_type.size(), RT_MEMCPY_HOST_TO_DEVICE)); + op_info.opType = static_cast(reinterpret_cast(d_op_type_name)); + op_info.opLen = op_type.size(); + op_info.kernelsType = TF_KERNEL; + req_aicpu_op_info_list.emplace_back(op_info); + } + GELOGI("Check aicpu op all attr size: %zu, real attr size: %zu.", op_nums, req_aicpu_op_info_list.size()); + GE_CHK_RT(rtMemcpy(d_req_op_list, sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), req_aicpu_op_info_list.data(), + sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), RT_MEMCPY_HOST_TO_DEVICE)); + + SysOpCheckInfo op_check_info_req = { 0 }; + SysOpCheckResp op_check_info_res = { 0 }; + op_check_info_req.opListNum = op_nums; + op_check_info_req.offSetLen = sizeof(SysOpCheckInfo); + op_check_info_req.sysOpInfoList = static_cast(reinterpret_cast(d_req_op_list)); + + op_check_info_res.opListNum = 0; + op_check_info_res.isWithoutJson = 0; + op_check_info_res.returnCodeList = static_cast(reinterpret_cast(d_ret_code_list)); + op_check_info_res.sysOpInfoList = static_cast(reinterpret_cast(d_res_op_list)); + + uint32_t args_size = sizeof(SysOpCheckInfo) + sizeof(SysOpCheckResp); + status = rtMalloc(&args, args_size, RT_MEMORY_HBM); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + return RT_ERROR_TO_GE_STATUS(status); + } + allocated_mem.push_back(args); + GE_CHK_RT( + rtMemcpy(args, sizeof(SysOpCheckInfo), reinterpret_cast(&op_check_info_req), sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtMemcpy(reinterpret_cast(static_cast(static_cast(reinterpret_cast(args)) + op_check_info_req.offSetLen)), + sizeof(SysOpCheckResp), reinterpret_cast(&op_check_info_res), sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtStreamCreate(&stream, 0)); + GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream)); + + status = rtStreamSynchronize(stream); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); + return RT_ERROR_TO_GE_STATUS(status); + } + + // Check the response + SysOpCheckResp *d_op_check_info_res = reinterpret_cast(reinterpret_cast(static_cast(static_cast(reinterpret_cast(args)) + op_check_info_req.offSetLen))); + (void)memset_s(&op_check_info_res, sizeof(SysOpCheckResp), 0, sizeof(SysOpCheckResp)); + GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp), + RT_MEMCPY_DEVICE_TO_HOST)); + std::function callback = [&]() { + for (auto mem : allocated_mem) { + GE_CHK_RT(rtFree(mem)); + } + GE_CHK_RT(rtStreamDestroy(stream)); + }; + + if (op_check_info_res.isWithoutJson) { + GELOGI("No need to check aicpu in this scenoria."); + GE_MAKE_GUARD(release, callback); + return SUCCESS; + } + uint64_t res_op_nums = op_check_info_res.opListNum; + GELOGI("Check aicpu type, is without json: %d, res op num: %lu.", op_check_info_res.isWithoutJson, res_op_nums); + if (res_op_nums != 0) { + res_ret_code_list.clear(); + res_ret_code_list.resize(res_op_nums); + res_aicpu_op_info_list.clear(); + res_aicpu_op_info_list.resize(res_op_nums); + GE_CHK_RT(rtMemcpy(res_ret_code_list.data(), sizeof(ReturnCode) * res_op_nums, + reinterpret_cast(static_cast(op_check_info_res.returnCodeList)), + sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(), sizeof(SysOpInfo) * res_op_nums, + reinterpret_cast(static_cast(op_check_info_res.sysOpInfoList)), + sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); + if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) { + GELOGE(FAILED, "Number of retcode is not equal to number of op type."); + GE_MAKE_GUARD(release, callback); + return FAILED; + } + std::string fail_reason; + for (uint32_t i = 0; i < res_op_nums; i++) { + ReturnCode ret_code = res_ret_code_list.at(i); + SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i); + GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType, + aicpu_info.kernelsType, aicpu_info.opLen, ret_code); + std::vector op_name; + op_name.clear(); + op_name.resize(kOpNameMaxSize); + GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast(aicpu_info.opType), + aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST)); + std::string kernel_type = + (static_cast(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL"; + string op_name_str(op_name.data()); + fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + + " ret code:" + std::to_string(static_cast(ret_code)) + + "<0: op_type, 1: format, 2: datatype> \n"; + } + fail_reason += "not support."; + GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str()); + GE_MAKE_GUARD(release, callback); + return FAILED; + } + + GE_MAKE_GUARD(release, callback); + GELOGI("Cpu kernel launch check optype task success."); + return SUCCESS; +} + +Status ModelManager::CheckAicpuOpList(GeModelPtr ge_model) { + std::vector aicpu_optype_list; + std::vector aicpu_tf_optype_list; + bool aicpu_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list); + bool tf_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list); + if (!aicpu_need_check && !tf_need_check) { + GELOGI("Graph:%s No need to check aicpu optype.", ge_model->GetGraph().GetName().c_str()); + return SUCCESS; + } + GE_CHK_STATUS_RET(LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), + "Launch check aicpu op type failed."); + return SUCCESS; +} + } // namespace ge diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index 99af8415..dc685519 100644 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -295,6 +295,11 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { ge::Status LaunchKernelCustAicpuSo(const string &kernel_name); + ge::Status LaunchKernelCheckAicpuOp(std::vector &aicpu_optype_list, + std::vector &aicpu_tf_optype_list); + + ge::Status CheckAicpuOpList(GeModelPtr ge_model); + ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); ge::Status GenSessionId(uint64_t &session_id); @@ -353,7 +358,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { std::map> hybrid_model_map_; std::map> model_aicpu_kernel_; uint32_t max_model_id_; - std::recursive_mutex map_mutex_; + std::mutex map_mutex_; + std::mutex sess_ids_mutex_; std::mutex session_id_create_mutex_; static::std::mutex exeception_infos_mutex_; uint64_t session_id_bias_; diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index ce31ef30..74faeb24 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -90,18 +90,20 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci fusion_op_info_.op_index = context.op_index(); fusion_op_info_.original_op_names = original_op_names; fusion_op_info_.op_name = op_desc_->GetName()); + string session_graph_model_id; + davinci_model_->GetUniqueId(op_desc_, session_graph_model_id); + // get bin_file_key + const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id); // new aicpu kernel(rtCpuKernelLaunch) no need to check function if (kernel_type_ == ccKernelType::CCE_AI_CORE) { - rtError_t rt_ret = rtGetFunctionByName(const_cast(kernel_def.stub_func().c_str()), &stub_func_); + rtError_t rt_ret; + rt_ret = rtGetFunctionByName(const_cast(kernel_def.stub_func().c_str()), &stub_func_); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s", kernel_def.stub_func().c_str()); return RT_ERROR_TO_GE_STATUS(rt_ret);); } else if (kernel_type_ == ccKernelType::TE) { - // get bin_file_key - string session_graph_model_id; - davinci_model_->GetUniqueId(op_desc_, session_graph_model_id); - const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id); - rtError_t rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_); + rtError_t rt_ret; + rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key); return RT_ERROR_TO_GE_STATUS(rt_ret);); @@ -370,11 +372,7 @@ Status KernelTaskInfo::SuperKernelDistribute() { Status KernelTaskInfo::Distribute() { GELOGD("KernelTaskInfo Distribute Start."); if (davinci_model_->IsKnownNode()) { - if (kernel_type_ == ccKernelType::TE) { - args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); - } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { - args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_); - } + args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_); } rtError_t rt_ret = RT_ERROR_NONE; @@ -430,31 +428,36 @@ Status KernelTaskInfo::UpdateArgs() { const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); vector input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_); vector output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_); + vector workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); vector io_addrs; - io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); - io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); - if (kernel_type_ == ccKernelType::TE) { - vector workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); + if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) { + io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); + io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); - davinci_model_->SetTotalIOAddrs(io_addrs); - } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { - davinci_model_->UpdateKnownZeroCopyAddr(io_addrs); - uintptr_t io_addr = reinterpret_cast(args_addr.get()) + sizeof(aicpu::AicpuParamHead); - auto addrs_size = sizeof(uint64_t) * io_addrs.size(); - errno_t sec_ret = memcpy_s(reinterpret_cast(io_addr), addrs_size, io_addrs.data(), addrs_size); - if (sec_ret != EOK) { - GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); - return FAILED; - } - // copy args to device - rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); - return RT_ERROR_TO_GE_STATUS(rt_ret); + } else { + string peer_input_name; + if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) { + uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name); + if (output_index > output_data_addrs.size()) { + GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.", + output_data_addrs.size(), output_index); + return FAILED; + } + io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); + for (size_t i = 0; i < output_data_addrs.size(); ++i) { + if (i == output_index) { + void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_); + io_addrs.emplace_back(fixed_addr); + continue; + } + io_addrs.emplace_back(output_data_addrs[i]); + } + io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); } } + davinci_model_->SetTotalIOAddrs(io_addrs); GELOGI("KernelTaskInfo::UpdateArgs success."); return SUCCESS; } @@ -530,18 +533,33 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { } Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { - const domi::KernelDef &kernel_def = task_def.kernel(); + domi::KernelDef kernel_def = task_def.kernel(); + uint32_t args_size = kernel_def.args_size(); + args_offset_ = davinci_model->GetTotalArgsSize(); + davinci_model->SetTotalArgsSize(args_size); + GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); + + // get opcontext stored in model const domi::KernelContext &context = kernel_def.context(); - kernel_type_ = static_cast(context.kernel_type()); - if (kernel_type_ == ccKernelType::TE) { - uint32_t args_size = kernel_def.args_size(); - args_offset_ = davinci_model->GetTotalArgsSize(); - davinci_model->SetTotalArgsSize(args_size); - GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); - } else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { - hybrid_args_offset_ = davinci_model->GetHybridArgsSize(); - davinci_model->SetHybridArgsSize(kernel_def.args_size()); - GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_); + // get opdesc + op_desc_ = davinci_model->GetOpByIndex(context.op_index()); + GE_CHECK_NOTNULL(op_desc_); + // alloc fixed addr + string peer_input_name; + if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) { + uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name); + if (output_index > op_desc_->GetOutputsSize()) { + GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", op_desc_->GetOutputsSize(), + output_index); + return FAILED; + } + fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name); + auto tensor_desc = op_desc_->GetOutputDesc(output_index); + int64_t tensor_size = 0; + GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); + davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size); + GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size, + fixed_addr_offset_); } return SUCCESS; } @@ -870,7 +888,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k } // copy args to new host memory - args_addr = std::unique_ptr(new (std::nothrow) uint8_t[args_size_]); + std::unique_ptr args_addr(new (std::nothrow) uint8_t[args_size_]); GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_) errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); if (sec_ret != EOK) { @@ -878,23 +896,8 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k return FAILED; } - auto aicpu_param_head = reinterpret_cast(args_addr.get()); - const auto &ext_info = kernel_def.kernel_ext_info(); - auto init_ret = InitAicpuTaskExtInfo(ext_info); - if (init_ret != SUCCESS) { - GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); - return init_ret; - } - GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(), - op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); - - aicpu_param_head->extInfoAddr = reinterpret_cast(aicpu_ext_info_addr_); - aicpu_param_head->extInfoLength = static_cast(ext_info.size()); - - if (davinci_model_->IsKnownNode()) { - return SUCCESS; - } const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); + vector input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc); vector output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc); vector io_addrs; @@ -911,6 +914,19 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k } } + auto aicpu_param_head = reinterpret_cast(args_addr.get()); + const auto &ext_info = kernel_def.kernel_ext_info(); + auto init_ret = InitAicpuTaskExtInfo(ext_info); + if (init_ret != SUCCESS) { + GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); + return init_ret; + } + GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(), + op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); + + aicpu_param_head->extInfoAddr = reinterpret_cast(aicpu_ext_info_addr_); + aicpu_param_head->extInfoLength = static_cast(ext_info.size()); + // malloc device memory for args rtError_t rt_ret = rtMalloc(static_cast(&args_), args_size_, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h index 7717edd3..1f90ede1 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h @@ -159,9 +159,7 @@ class KernelTaskInfo : public TaskInfo { OpDescPtr op_desc_; DavinciModel *davinci_model_; uint32_t args_offset_ = 0; - uint32_t hybrid_args_offset_ = 0; int64_t fixed_addr_offset_ = 0; - std::unique_ptr args_addr = nullptr; bool call_save_dump_ = false; // aicpu ext_info device mem diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 7564c57b..3beea57f 100644 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -23,15 +23,25 @@ #include #include #include +#include +#include "common/ge/ge_util.h" #include "common/math/math_util.h" #include "common/thread_pool.h" +#include "common/util.h" +#include "external/graph/types.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/ge_inner_error_codes.h" +#include "framework/common/ge_types.h" #include "analyzer/analyzer.h" #include "graph/common/ge_call_wrapper.h" #include "graph/common/local_context.h" #include "graph/common/transop_util.h" +#include "graph/debug/ge_attr_define.h" #include "graph/ge_context.h" #include "graph/ge_global_options.h" +#include "graph/ge_local_context.h" +#include "graph/manager/graph_mem_allocator.h" #include "graph/manager/util/rt_context_util.h" #include "graph/partition/dynamic_shape_partition.h" #include "graph/passes/enter_pass.h" @@ -51,6 +61,8 @@ #include "graph/passes/dimension_adjust_pass.h" #include "graph/passes/dimension_compute_pass.h" #include "graph/passes/flow_ctrl_pass.h" +#include "graph/passes/hccl_group_pass.h" +#include "graph/passes/hccl_memcpy_pass.h" #include "graph/passes/identity_pass.h" #include "graph/passes/input_output_connection_identify_pass.h" #include "graph/passes/iterator_op_pass.h" @@ -65,6 +77,8 @@ #include "graph/passes/permute_pass.h" #include "graph/passes/prune_pass.h" #include "graph/passes/ref_identity_delete_op_pass.h" +#include "graph/passes/replace_with_empty_const_pass.h" +#include "graph/passes/remove_same_const_pass.h" #include "graph/passes/reshape_recovery_pass.h" #include "graph/passes/reshape_remove_pass.h" #include "graph/passes/same_transdata_breadth_fusion_pass.h" @@ -74,11 +88,14 @@ #include "graph/passes/switch_logic_remove_pass.h" #include "graph/passes/switch_to_stream_switch_pass.h" #include "graph/passes/transop_breadth_fusion_pass.h" +#include "graph/passes/transop_depth_fusion_pass.h" #include "graph/passes/transop_nearby_allreduce_fusion_pass.h" #include "graph/passes/transop_symmetry_elimination_pass.h" #include "graph/passes/transop_without_reshape_fusion_pass.h" #include "graph/passes/transpose_transdata_pass.h" +#include "graph/passes/useless_control_out_remove_pass.h" #include "graph/passes/variable_op_pass.h" +#include "graph/passes/variable_prepare_op_pass.h" #include "graph/passes/variable_ref_delete_op_pass.h" #include "graph/passes/variable_ref_useless_control_out_delete_pass.h" #include "graph/passes/end_of_sequence_add_control_pass.h" @@ -89,6 +106,9 @@ #include "graph/passes/memcpy_addr_async_pass.h" #include "graph/build/label_allocator.h" #include "graph/utils/tensor_adapter.h" +#include "graph/utils/type_utils.h" +#include "graph/graph_util.h" +#include "graph/types.h" #include "inc/pass_manager.h" #include "init/gelib.h" #include "ir_build/atc_ir_common.h" @@ -532,7 +552,8 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); } std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, - compute_graph->GetGraphID(), subgraph, compute_graph->GetName(), session_id, + compute_graph->GetGraphID(), subgraph, + compute_graph->GetName(), session_id, GetThreadLocalContext()); if (!f.valid()) { GELOGE(FAILED, "Future is invalid"); @@ -547,7 +568,8 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); } std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, - compute_graph->GetGraphID(), subgraph, compute_graph->GetName(), session_id, + compute_graph->GetGraphID(), subgraph, + compute_graph->GetName(), session_id, GetThreadLocalContext()); if (!f.valid()) { GELOGE(FAILED, "Future is invalid"); @@ -2130,6 +2152,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { TransposeTransDataPass transpose_transdata_pass; TransOpSymmetryEliminationPass symmetry_elimination_pass; DimensionComputePass dimension_compute_pass; + UselessControlOutRemovePass useless_control_out_remove_pass; names_to_passes.emplace_back("EnterPass", &enter_pass); names_to_passes.emplace_back("AddNPass", &addn_pass); names_to_passes.emplace_back("SwitchDeadBranchElimination", &switch_dead_branch_elimination); @@ -2143,6 +2166,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { names_to_passes.emplace_back("DimensionComputePass", &dimension_compute_pass); names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); names_to_passes.emplace_back("DimensionAdjustPass", &dimension_adjust_pass); + names_to_passes.emplace_back("UselessControlOutRemovePass", &useless_control_out_remove_pass); GE_TIMESTAMP_START(names_to_passes); ret = GEPass(compute_graph).Run(names_to_passes); GE_TIMESTAMP_END(names_to_passes, "GraphManager::OptimizeStage1_2"); @@ -2183,6 +2207,8 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::VariableRefUselessControlOutDeletePass", new (std::nothrow) VariableRefUselessControlOutDeletePass)) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ReshapeRecoveryPass", new (std::nothrow) ReshapeRecoveryPass)) + GE_CHK_STATUS_RET( + graph_pass.AddPass("OptimizeStage1_3::RemoveSameConstPass", new (std::nothrow) RemoveSameConstPass)) if (options_.train_graph_flag) { // Priority: The GlobalStepInsertPass should work before graph partitioner. // Reason: Make sure that the var "global_step" can be partitioned to known sub graph and allocated memory diff --git a/ge/graph/manager/graph_mem_allocator.cc b/ge/graph/manager/graph_mem_allocator.cc index 7ee7df20..b832986b 100644 --- a/ge/graph/manager/graph_mem_allocator.cc +++ b/ge/graph/manager/graph_mem_allocator.cc @@ -16,7 +16,10 @@ #include "graph/manager/graph_mem_allocator.h" +#include #include + +#include "framework/common/debug/ge_log.h" #include "graph/manager/graph_caching_allocator.h" #include "graph/manager/rdma_pool_allocator.h" diff --git a/ge/graph/optimize/graph_optimize.cc b/ge/graph/optimize/graph_optimize.cc index 1548a065..c94408de 100644 --- a/ge/graph/optimize/graph_optimize.cc +++ b/ge/graph/optimize/graph_optimize.cc @@ -76,8 +76,7 @@ void AddNodeInputProperty(ComputeGraphPtr &compute_graph) { } } -Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, - const std::string &engine_name) { +Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std::string &engine_name) { if (compute_graph == nullptr) { GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeSubGraph]: compute_graph is nullptr."); return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL; diff --git a/ge/graph/passes/attach_stream_label_pass.cc b/ge/graph/passes/attach_stream_label_pass.cc index c0e0f669..cd3509c7 100644 --- a/ge/graph/passes/attach_stream_label_pass.cc +++ b/ge/graph/passes/attach_stream_label_pass.cc @@ -18,6 +18,8 @@ #include "ge/ge_api_types.h" #include "graph/common/omg_util.h" +using std::string; + namespace ge { Status AttachStreamLabelPass::Run(ComputeGraphPtr graph) { GELOGD("AttachStreamLabelPass Enter."); @@ -187,21 +189,10 @@ Status AttachStreamLabelPass::UpdateEnterNode() { } std::stack enter_nodes; - std::string batch_label; for (const auto &enter_node : pair.second) { enter_nodes.emplace(enter_node); - std::string tmp_label; - (void)AttrUtils::GetStr(enter_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, tmp_label); - if (!tmp_label.empty()) { - if (batch_label.empty()) { - batch_label = tmp_label; - } else if (batch_label != tmp_label) { - GELOGE(FAILED, "multi batch_label exist, label1=%s, label2=%s.", batch_label.c_str(), tmp_label.c_str()); - return FAILED; - } - } } - if (UpdateLoopBranch(enter_nodes, active_label_list[0], batch_label) != SUCCESS) { + if (UpdateLoopBranch(enter_nodes, active_label_list[0]) != SUCCESS) { GELOGE(FAILED, "Update stream_label for loop_branch failed."); return FAILED; } @@ -226,10 +217,7 @@ Status AttachStreamLabelPass::SetEnterLabel(const std::vector &enter_no } for (const auto &enter_node : enter_nodes) { - GE_CHECK_NOTNULL(enter_node->GetOpDesc()); - if (enter_node->GetOpDesc()->HasAttr(ATTR_NAME_STREAM_LABEL)) { - GE_CHK_STATUS_RET(SetStreamLabel(enter_node, stream_label), "Set stream label failed."); - } + GE_CHK_STATUS_RET(SetStreamLabel(enter_node, stream_label), "Set stream label failed."); } return SUCCESS; } @@ -241,8 +229,7 @@ Status AttachStreamLabelPass::SetEnterLabel(const std::vector &enter_no /// @param [in] batch_label /// @return Status /// -Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack &enter_nodes, const std::string &stream_label, - const std::string &batch_label) { +Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack &enter_nodes, const string &stream_label) { std::stack nodes(enter_nodes); NodePtr cur_node = nullptr; while (!nodes.empty()) { @@ -251,11 +238,6 @@ Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack &enter_ for (const NodePtr &out_node : cur_node->GetOutAllNodes()) { OpDescPtr out_desc = out_node->GetOpDesc(); GE_CHECK_NOTNULL(out_desc); - std::string tmp_label; - (void)AttrUtils::GetStr(out_desc, ATTR_NAME_BATCH_LABEL, tmp_label); - if (!tmp_label.empty() && (tmp_label != batch_label)) { - continue; - } std::string out_type = out_desc->GetType(); bool need_skip = out_desc->HasAttr(ATTR_NAME_STREAM_LABEL) || (out_type == ENTER) || (out_type == REFENTER) || diff --git a/ge/graph/passes/attach_stream_label_pass.h b/ge/graph/passes/attach_stream_label_pass.h index 19f11480..ad71d58f 100644 --- a/ge/graph/passes/attach_stream_label_pass.h +++ b/ge/graph/passes/attach_stream_label_pass.h @@ -58,11 +58,9 @@ class AttachStreamLabelPass : public GraphPass { /// @brief Update stream_label for loop_branch /// @param [in] enter_nodes /// @param [in] stream_label - /// @param [in] batch_label /// @return Status /// - static Status UpdateLoopBranch(const std::stack &enter_nodes, const std::string &stream_label, - const std::string &batch_label); + static Status UpdateLoopBranch(const std::stack &enter_nodes, const std::string &stream_label); /// /// @brief Update stream_label start with enter nodes diff --git a/ge/graph/passes/base_pass.cc b/ge/graph/passes/base_pass.cc index 68efbeb9..3b854c18 100644 --- a/ge/graph/passes/base_pass.cc +++ b/ge/graph/passes/base_pass.cc @@ -96,7 +96,7 @@ Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unorder node->GetName().c_str(), node->GetType().c_str()); continue; } - if (node_to_re_pass->IsAllInNodesSeen(nodes_seen)) { + if (nodes_seen.count(node_to_re_pass.get()) > 0 || node_to_re_pass->IsAllInNodesSeen(nodes_seen)) { GELOGD("The node %s will be re-pass later", node_to_re_pass->GetName().c_str()); nodes_re_pass.insert(node_to_re_pass); } else { diff --git a/ge/graph/passes/dimension_adjust_pass.cc b/ge/graph/passes/dimension_adjust_pass.cc index fc5fe69f..5701faf5 100644 --- a/ge/graph/passes/dimension_adjust_pass.cc +++ b/ge/graph/passes/dimension_adjust_pass.cc @@ -80,7 +80,71 @@ Status DimensionAdjustPass::Run(ge::NodePtr &node) { } } + ret = DealWithInNodes(node); + if (ret != SUCCESS) { + GELOGE(ret, "DealWithInNodes of %s failed.", node->GetName().c_str()); + return ret; + } + std::vector data_relink_io_map = {kDataInputIndex}; return IsolateAndDeleteNode(node, data_relink_io_map); } + +Status DimensionAdjustPass::DealWithInNodes(NodePtr &node) { + GE_CHECK_NOTNULL(node); + GE_CHECK_NOTNULL(node->GetOpDesc()); + auto graph = node->GetOwnerComputeGraph(); + auto in_data_anchors = node->GetAllInDataAnchors(); + for (auto &in_data_anchor : in_data_anchors) { + if (in_data_anchor == nullptr) { + continue; + } + auto in_node_anchor = in_data_anchor->GetPeerOutAnchor(); + if (in_node_anchor == nullptr) { + continue; + } + auto in_node = in_node_anchor->GetOwnerNode(); + if (in_node->GetType() == SWITCHN) { + auto identity_name = node->GetName() + "_ctrl_identity_" + std::to_string(in_data_anchor->GetIdx()); + auto identity = + AddIdentityNodeToGraph(identity_name, node->GetOpDesc()->GetInputDesc(in_data_anchor->GetIdx()), graph); + GE_CHECK_NOTNULL(identity); + GELOGI("Create new identity node[%s] after node %s[type: %s] success.", identity->GetName().c_str(), + in_node->GetName().c_str(), in_node->GetType().c_str()); + GE_CHK_STATUS_RET(GraphUtils::AddEdge(in_node_anchor, identity->GetInDataAnchor(0))) + GE_CHECK_NOTNULL(identity->GetOutControlAnchor()); + if (identity->GetOutControlAnchor()->IsLinkedWith(node->GetInControlAnchor())) { + continue; + } + GE_CHK_STATUS_RET(GraphUtils::AddEdge(identity->GetOutControlAnchor(), node->GetInControlAnchor())) + } + } + + return SUCCESS; +} + +NodePtr DimensionAdjustPass::AddIdentityNodeToGraph(const string &name, const GeTensorDesc &tensor, + ComputeGraphPtr &graph) { + if (graph == nullptr) { + GELOGE(INTERNAL_ERROR, "Comput graph ptr is null in creating identity node."); + return nullptr; + } + + OpDescPtr desc = MakeShared("", ""); + if (desc == nullptr) { + GELOGE(MEMALLOC_FAILED, "Failed to create op desc."); + return nullptr; + } + + desc->SetName(name); + desc->SetType(IDENTITY); + auto ret = desc->AddInputDesc(tensor); + auto ret2 = desc->AddOutputDesc(tensor); + if ((ret != GRAPH_SUCCESS) || (ret2 != GRAPH_SUCCESS)) { + GELOGE(INTERNAL_ERROR, "Failed to add input/output desc in creating identity."); + return nullptr; + } + + return graph->AddNodeFront(desc); +} } // namespace ge diff --git a/ge/graph/passes/dimension_adjust_pass.h b/ge/graph/passes/dimension_adjust_pass.h index 685d9694..7766f140 100644 --- a/ge/graph/passes/dimension_adjust_pass.h +++ b/ge/graph/passes/dimension_adjust_pass.h @@ -34,6 +34,10 @@ namespace ge { class DimensionAdjustPass : public BaseNodePass { public: Status Run(ge::NodePtr &node) override; + + private: + Status DealWithInNodes(ge::NodePtr &node); + NodePtr AddIdentityNodeToGraph(const std::string &name, const GeTensorDesc &tensor, ComputeGraphPtr &graph); }; } // namespace ge diff --git a/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc b/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc index d50b6df9..6fa63642 100644 --- a/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc +++ b/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc @@ -113,16 +113,13 @@ Status DynamicSingleOpResetShapePass::ResetOpShape(OpDescPtr &op_desc) { GE_CHECK_NOTNULL(op_desc); std::vector dynamic_shape_dims = {kDynamicShapeDim}; GeShape dynamic_shape(dynamic_shape_dims); - bool reset_shape_flag = false; - if (ResetInputTensorShape(op_desc, dynamic_shape, reset_shape_flag) == SUCCESS && reset_shape_flag) { - (void)ResetOutputTensorShape(op_desc, dynamic_shape); - } + (void)ResetInputTensorShape(op_desc, dynamic_shape); + (void)ResetOutputTensorShape(op_desc, dynamic_shape); return SUCCESS; } -Status DynamicSingleOpResetShapePass::ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape, - bool &reset_shape_flag) { - reset_shape_flag = false; +Status DynamicSingleOpResetShapePass::ResetInputTensorShape(OpDescPtr &op_desc, + const GeShape &dynamic_shape) { GE_CHECK_NOTNULL(op_desc); for (size_t i = 0; i < op_desc->GetAllInputsDesc().size(); i++) { auto input_desc = op_desc->MutableInputDesc(static_cast(i)); @@ -136,7 +133,6 @@ Status DynamicSingleOpResetShapePass::ResetInputTensorShape(OpDescPtr &op_desc, if (CheckIfConstInput(input_desc)) { continue; } - reset_shape_flag = true; input_desc->SetShape(dynamic_shape); } return SUCCESS; diff --git a/ge/graph/passes/dynamic_single_op_reset_shape_pass.h b/ge/graph/passes/dynamic_single_op_reset_shape_pass.h index 897fcac6..765448ff 100644 --- a/ge/graph/passes/dynamic_single_op_reset_shape_pass.h +++ b/ge/graph/passes/dynamic_single_op_reset_shape_pass.h @@ -27,7 +27,7 @@ class DynamicSingleOpResetShapePass : public GraphPass { private: Status ResetOpShape(OpDescPtr &op_desc); - Status ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape, bool &reset_shape_flag); + Status ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape); Status ResetOutputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape); Status CheckAllAicpuNodes(const ComputeGraphPtr &graph, bool &is_not_aicpu); bool CheckIfConstInput(const GeTensorDescPtr &input_tensor_desc); diff --git a/ge/graph/passes/enter_pass.cc b/ge/graph/passes/enter_pass.cc index afeca78f..066c97cf 100644 --- a/ge/graph/passes/enter_pass.cc +++ b/ge/graph/passes/enter_pass.cc @@ -23,6 +23,7 @@ namespace { const size_t kOutNodesNum = 1; +const size_t kInCtrlNodesNum = 1; } namespace ge { @@ -55,6 +56,7 @@ Status EnterPass::Run(NodePtr &node) { if (out_ctrl_node == nullptr) { continue; } + GELOGI("Remove control edge from %s to %s.", node->GetName().c_str(), out_ctrl_node->GetName().c_str()); if (GraphUtils::RemoveEdge(node->GetOutControlAnchor(), out_ctrl_node->GetInControlAnchor()) != GRAPH_SUCCESS) { GELOGE(FAILED, "Remove Enter ctrl output fail, %s->%s", node->GetName().c_str(), out_ctrl_node->GetName().c_str()); @@ -62,8 +64,12 @@ Status EnterPass::Run(NodePtr &node) { } } } else { - if (OptimizeEnter(node, in_node) != SUCCESS) { - GELOGE(FAILED, "Optimize enter node[%s] failed.", node->GetName().c_str()); + if (OptimizeEnterWithOnlyDataOut(node, in_node) != SUCCESS) { + GELOGE(FAILED, "Optimize enter node[%s] with only out data node failed.", node->GetName().c_str()); + return FAILED; + } + if (UnlinkCtrlEdgeBeforeConst(node) != SUCCESS) { + GELOGE(FAILED, "Unlink control edge before const of node[%s]'s out nodes failed.", node->GetName().c_str()); return FAILED; } } @@ -72,7 +78,7 @@ Status EnterPass::Run(NodePtr &node) { return SUCCESS; } -Status EnterPass::OptimizeEnter(NodePtr &node, NodePtr &in_node) { +Status EnterPass::OptimizeEnterWithOnlyDataOut(NodePtr &node, NodePtr &in_node) { if ((in_node->GetOutAllNodes().size() != kOutNodesNum) || !node->GetOutControlNodes().empty()) { return SUCCESS; } @@ -83,17 +89,61 @@ Status EnterPass::OptimizeEnter(NodePtr &node, NodePtr &in_node) { } GE_CHECK_NOTNULL(in_node->GetOutDataAnchor(0)); - GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0))); + GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0))) const auto &out_data_anchor = node->GetOutDataAnchor(0); GE_CHECK_NOTNULL(out_data_anchor); for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { - GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor)); - GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor)); + GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor)) + GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor)) } - GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(node->GetOwnerComputeGraph(), node)); + GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(node->GetOwnerComputeGraph(), node)) AddNodeDeleted(node); AddRePassNodesWithInOut(in_node); return SUCCESS; } + +Status EnterPass::UnlinkCtrlEdgeBeforeConst(NodePtr &node) { + auto out_ctrl_nodes = node->GetOutControlNodes(); + if (out_ctrl_nodes.empty()) { + return SUCCESS; + } + auto out_ctrl_anchor = node->GetOutControlAnchor(); + GE_CHECK_NOTNULL(out_ctrl_anchor); + + for (auto &out_ctrl_node : out_ctrl_nodes) { + GE_CHECK_NOTNULL(out_ctrl_node); + if ((out_ctrl_node->GetType() != CONSTANT) && (out_ctrl_node->GetType() != CONSTANTOP)) { + continue; + } + auto in_ctrl_nodes = out_ctrl_node->GetInControlNodes(); + if (in_ctrl_nodes.size() != kInCtrlNodesNum) { + continue; + } + + // Skip when has merge out + bool has_merge_out = false; + auto out_nodes_of_const = out_ctrl_node->GetOutAllNodes(); + for (const auto &out_node_of_const : out_nodes_of_const) { + GE_CHECK_NOTNULL(out_node_of_const); + if (out_node_of_const->GetType() == MERGE || out_node_of_const->GetType() == REFMERGE) { + has_merge_out = true; + break; + } + } + if (has_merge_out) { + continue; + } + + GELOGI("Unlink control edge from %s to %s.", node->GetName().c_str(), out_ctrl_node->GetName().c_str()); + GE_CHK_STATUS_RET(out_ctrl_anchor->Unlink(out_ctrl_node->GetInControlAnchor())) + for (auto &out_node_of_const : out_nodes_of_const) { + if (!out_ctrl_anchor->IsLinkedWith(out_node_of_const->GetInControlAnchor())) { + GELOGI("Link control edge from %s to %s.", node->GetName().c_str(), out_node_of_const->GetName().c_str()); + GE_CHK_STATUS_RET(out_ctrl_anchor->LinkTo(out_node_of_const->GetInControlAnchor())) + } + } + } + return SUCCESS; +} } // namespace ge diff --git a/ge/graph/passes/enter_pass.h b/ge/graph/passes/enter_pass.h index 677516ff..1417b1f0 100644 --- a/ge/graph/passes/enter_pass.h +++ b/ge/graph/passes/enter_pass.h @@ -25,7 +25,8 @@ class EnterPass : public BaseNodePass { Status Run(NodePtr &node) override; private: - Status OptimizeEnter(NodePtr &node, NodePtr &in_node); + Status OptimizeEnterWithOnlyDataOut(NodePtr &node, NodePtr &in_node); + Status UnlinkCtrlEdgeBeforeConst(NodePtr &node); }; } // namespace ge #endif // GE_GRAPH_PASSES_ENTER_PASS_H_ diff --git a/ge/graph/passes/folding_pass.cc b/ge/graph/passes/folding_pass.cc index 93dc2c40..227a0f61 100644 --- a/ge/graph/passes/folding_pass.cc +++ b/ge/graph/passes/folding_pass.cc @@ -173,10 +173,7 @@ Status FoldingPass::DealWithInNodes(NodePtr &node) { continue; } auto in_node = in_node_anchor->GetOwnerNode(); - if (in_node == nullptr) { - continue; - } - if ((in_node->GetType() == SWITCH) || (in_node->GetType() == REFSWITCH)) { + if ((in_node->GetType() == SWITCH) || (in_node->GetType() == REFSWITCH) || (in_node->GetType() == SWITCHN)) { GELOGI("The in_node name is %s, and node type is %s.", in_node->GetName().c_str(), in_node->GetType().c_str()); auto ret = in_node_anchor->Unlink(in_data_anchor); if (ret != SUCCESS) { diff --git a/ge/graph/passes/merge_to_stream_merge_pass.cc b/ge/graph/passes/merge_to_stream_merge_pass.cc index 103fbb1b..c1a57a61 100644 --- a/ge/graph/passes/merge_to_stream_merge_pass.cc +++ b/ge/graph/passes/merge_to_stream_merge_pass.cc @@ -89,16 +89,6 @@ Status MergeToStreamMergePass::ReplaceMergeNode(const ComputeGraphPtr &graph, co GE_CHK_STATUS_RET(SetNextIteration(stream_merge, next_iteration_name), "Set next iteration failed"); } - if (merge_op_desc->HasAttr(ATTR_NAME_BATCH_LABEL)) { - string batch_label; - (void)AttrUtils::GetStr(merge_op_desc, ATTR_NAME_BATCH_LABEL, batch_label); - if (!batch_label.empty()) { - auto stream_merge_desc = stream_merge->GetOpDesc(); - GE_CHECK_NOTNULL(stream_merge_desc); - (void)AttrUtils::SetStr(stream_merge_desc, ATTR_NAME_BATCH_LABEL, batch_label); - } - } - return AddActiveNodes(graph, stream_merge); } diff --git a/ge/graph/passes/multi_batch_clone_pass.cc b/ge/graph/passes/multi_batch_clone_pass.cc index 87d9749a..86ea043e 100644 --- a/ge/graph/passes/multi_batch_clone_pass.cc +++ b/ge/graph/passes/multi_batch_clone_pass.cc @@ -503,12 +503,24 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) { /// /// @ingroup ge -/// @brief Set shape to Data node in branch. -/// @param [in] const NodePtr &data: data in branch. +/// @brief Update Data node in Subgraph. +/// @param [in] const NodePtr &data: data in Subgraph. /// @param [in] size_t index: The batch index. /// @return 0: SUCCESS / others: FAILED /// -Status MultiBatchClonePass::UpdateShapeToData(const NodePtr &data, size_t index) { +Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index) { + int node_index = -1; + if (!AttrUtils::GetInt(data->GetOpDesc(), ATTR_NAME_INDEX, node_index)) { + GELOGE(FAILED, "Failed to get index from data[%s]", data->GetName().c_str()); + return FAILED; + } + + int parent_index = node_index + 1; + if (!AttrUtils::SetInt(data->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { + GELOGE(FAILED, "Failed to set parent index for node %s", data->GetName().c_str()); + return FAILED; + } + auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); const auto &dims = data_shape.GetDims(); if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { @@ -580,13 +592,15 @@ Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const subgraph->SetParentGraph(graph); graph->AddSubgraph(subgraph->GetName(), subgraph); all_branch_output_[subgraph] = subgraph->FindFirstNodeMatchType(NETOUTPUT); + GE_CHK_STATUS_RET(UpdateSubgraphOutput(all_branch_output_[subgraph]), + "Update %s failed", all_branch_output_[subgraph]->GetName().c_str()); const string key_name = "branches" + std::to_string(i); op_desc->AddSubgraphName(key_name); op_desc->SetSubgraphInstanceName(i, subgraph->GetName()); for (const auto &data : input_nodes) { - GE_CHK_STATUS_RET(UpdateShapeToData(data, i), "Update %s failed", subgraph->GetName().c_str()); + GE_CHK_STATUS_RET(UpdateSubgraphData(data, i), "Update %s failed", subgraph->GetName().c_str()); } } @@ -595,55 +609,27 @@ Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const const auto &op_desc = n->GetOpDesc(); op_desc->SetName(n->GetName() + kMultiBatchNodePostfix + "0"); if (n->GetType() == DATA) { - GE_CHK_STATUS_RET(UpdateShapeToData(n, 0), "Update %s failed", branch->GetName().c_str()); + GE_CHK_STATUS_RET(UpdateSubgraphData(n, 0), "Update %s failed", branch->GetName().c_str()); } } - return PostProcSubgraph(graph); + return SUCCESS; } /// /// @ingroup ge -/// @brief Assign parent index for branches. -/// @param [in] const ComputeGraphPtr &graph: Root/Case graph. +/// @brief Update output_node in Subgraph. +/// @param [in] const NodePtr &output_node: output_node in Subgraph. /// @return 0: SUCCESS / others: FAILED /// -Status MultiBatchClonePass::PostProcSubgraph(const ComputeGraphPtr &graph) { - auto func_desc = case_node_->GetOpDesc(); - domi::ParseSubgraphFuncV2 parse_func_v2 = nullptr; - auto post_func = domi::OpRegistry::Instance()->GetParseSubgraphPostFunc(func_desc->GetType()); - if (post_func == nullptr) { - GELOGW("The subgraph post func for node %s type %s is null.", case_node_->GetName().c_str(), - case_node_->GetType().c_str()); - if (domi::OpRegistry::Instance()->GetParseSubgraphPostFunc(func_desc->GetType(), parse_func_v2) != SUCCESS || - parse_func_v2 == nullptr) { - GELOGW("The subgraph new post func v2 for node %s type %s is null", case_node_->GetName().c_str(), - case_node_->GetType().c_str()); - return FAILED; - } - } - - for (const auto &name : func_desc->GetSubgraphInstanceNames()) { - const auto &subgraph = graph->GetSubgraph(name); - if (subgraph == nullptr) { - GELOGE(FAILED, "Subgraph not found, name: %s", name.c_str()); - return FAILED; - } - - std::string subgraph_name; - GE_CHK_STATUS_RET(func_desc->GetSubgraphNameByInstanceName(subgraph->GetName(), subgraph_name), - "Subgraph: %s get subgraph name failed.", subgraph->GetName().c_str()); - - auto graph = GraphUtils::CreateGraphFromComputeGraph(subgraph); - Status ret = FAILED; - if (post_func != nullptr) { - ret = post_func(subgraph_name, graph); - } else if (parse_func_v2 != nullptr) { - ret = parse_func_v2(subgraph_name.c_str(), graph); - } - if (ret != SUCCESS) { - GELOGE(FAILED, "Failed to post-process subgraph %s on node %s type %s", graph.GetName().c_str(), - case_node_->GetName().c_str(), case_node_->GetType().c_str()); +Status MultiBatchClonePass::UpdateSubgraphOutput(const NodePtr &output_node) { + const auto &op_desc = output_node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + for (size_t index = 0; index < op_desc->GetInputsSize(); ++index) { + GeTensorDescPtr tensor = op_desc->MutableInputDesc(index); + GE_CHECK_NOTNULL(tensor); + if (!AttrUtils::SetInt(tensor, ATTR_NAME_PARENT_NODE_INDEX, index)) { + GELOGE(FAILED, "Failed to set parent index for node %s", output_node->GetName().c_str()); return FAILED; } } diff --git a/ge/graph/passes/multi_batch_clone_pass.h b/ge/graph/passes/multi_batch_clone_pass.h index 1155dfc8..ee137b5a 100644 --- a/ge/graph/passes/multi_batch_clone_pass.h +++ b/ge/graph/passes/multi_batch_clone_pass.h @@ -105,12 +105,20 @@ class MultiBatchClonePass : public GraphPass { /// /// @ingroup ge - /// @brief Set shape to Data node in branch. - /// @param [in] const NodePtr &data: data in branch. + /// @brief Update Data node in Subgraph. + /// @param [in] const NodePtr &data: data in Subgraph. /// @param [in] size_t index: The batch index. /// @return 0: SUCCESS / others: FAILED /// - Status UpdateShapeToData(const NodePtr &data, size_t index); + Status UpdateSubgraphData(const NodePtr &data, size_t index); + + /// + /// @ingroup ge + /// @brief Update output_node in Subgraph. + /// @param [in] const NodePtr &output_node: output_node in Subgraph. + /// @return 0: SUCCESS / others: FAILED + /// + Status UpdateSubgraphOutput(const NodePtr &output_node); /// /// @ingroup ge @@ -133,14 +141,6 @@ class MultiBatchClonePass : public GraphPass { /// /// @ingroup ge - /// @brief Assign parent index for branches. - /// @param [in] const ComputeGraphPtr &graph: Root/Case graph. - /// @return 0: SUCCESS / others: FAILED - /// - Status PostProcSubgraph(const ComputeGraphPtr &graph); - - /// - /// @ingroup ge /// @brief Remove subgraph supend output anchor. /// @param [in] ComputeGraphPtr &graph: Parent compute graph. /// @return 0: SUCCESS / others: FAILED diff --git a/ge/graph/passes/next_iteration_pass.cc b/ge/graph/passes/next_iteration_pass.cc index d8c4779d..cf46f09d 100644 --- a/ge/graph/passes/next_iteration_pass.cc +++ b/ge/graph/passes/next_iteration_pass.cc @@ -19,6 +19,8 @@ #include "common/ge/ge_util.h" #include "graph/common/omg_util.h" +using std::string; + namespace ge { Status NextIterationPass::Run(ComputeGraphPtr graph) { GELOGD("NextIterationPass Enter"); @@ -35,10 +37,6 @@ Status NextIterationPass::Run(ComputeGraphPtr graph) { return INTERNAL_ERROR; } } - if (GroupWithNoBatch(graph) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Group enter_nodes failed without batch_label attr."); - return INTERNAL_ERROR; - } if (FindWhileGroups() != SUCCESS) { GELOGE(INTERNAL_ERROR, "Find while groups failed."); @@ -73,75 +71,22 @@ Status NextIterationPass::GroupEnterNode(const NodePtr &enter_node) { return FAILED; } - std::string batch_label; - (void)ge::AttrUtils::GetStr(enter_desc, ATTR_NAME_BATCH_LABEL, batch_label); - if (batch_label.empty()) { - auto frame_iter = frame_enter_map_.find(frame_name); - if (frame_iter == frame_enter_map_.end()) { - std::vector enter_nodes; - enter_nodes.emplace_back(enter_node); - frame_enter_map_[frame_name] = enter_nodes; - } else { - frame_iter->second.emplace_back(enter_node); - } - return SUCCESS; + string batch_label; + if (ge::AttrUtils::GetStr(enter_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { + frame_name += batch_label; } - auto group_iter = loop_group_map_.find(frame_name); - if (group_iter == loop_group_map_.end()) { + auto iter = loop_group_map_.find(frame_name); + if (iter == loop_group_map_.end()) { LoopCondGroupPtr loop_group = MakeShared(); if (loop_group == nullptr) { GELOGE(FAILED, "MakeShared for LoopCondGroup failed."); return FAILED; } loop_group->enter_nodes.emplace_back(enter_node); - loop_group_map_[frame_name][batch_label] = loop_group; + loop_group_map_[frame_name] = loop_group; } else { - auto batch_iter = group_iter->second.find(batch_label); - if (batch_iter == group_iter->second.end()) { - LoopCondGroupPtr loop_group = MakeShared(); - if (loop_group == nullptr) { - GELOGE(FAILED, "MakeShared for LoopCondGroup failed."); - return FAILED; - } - loop_group->enter_nodes.emplace_back(enter_node); - group_iter->second[batch_label] = loop_group; - } else { - batch_iter->second->enter_nodes.emplace_back(enter_node); - } - } - - return SUCCESS; -} - -/// -/// @brief Group Enter nodes without batch_label attr -/// @param [in] compute_graph -/// @return Status -/// -Status NextIterationPass::GroupWithNoBatch(const ComputeGraphPtr &graph) { - if (frame_enter_map_.empty()) { - GELOGI("All enter nodes in graph %s has batch_label attr.", graph->GetName().c_str()); - return SUCCESS; - } - for (const auto &item : frame_enter_map_) { - const std::string &frame_name = item.first; - auto iter = loop_group_map_.find(frame_name); - if (iter == loop_group_map_.end()) { - LoopCondGroupPtr loop_group = MakeShared(); - if (loop_group == nullptr) { - GELOGE(FAILED, "MakeShared for LoopCondGroup failed."); - return FAILED; - } - loop_group->enter_nodes = item.second; - loop_group_map_[frame_name][""] = loop_group; - } else { - for (auto &batch_item : iter->second) { - for (const auto &enter_node : item.second) { - batch_item.second->enter_nodes.emplace_back(enter_node); - } - } - } + iter->second->enter_nodes.emplace_back(enter_node); } return SUCCESS; @@ -154,55 +99,39 @@ Status NextIterationPass::GroupWithNoBatch(const ComputeGraphPtr &graph) { Status NextIterationPass::FindWhileGroups() { for (const auto &loop_group_iter : loop_group_map_) { const std::string &frame_name = loop_group_iter.first; - for (const auto &batch_iter : loop_group_iter.second) { - const std::string &batch_label = batch_iter.first; - for (const auto &enter_node : batch_iter.second->enter_nodes) { - for (const auto &out_node : enter_node->GetOutAllNodes()) { - GELOGI("Find while_group for enter_node %s, frame_name:%s, batch_label:%s.", enter_node->GetName().c_str(), - frame_name.c_str(), batch_label.c_str()); - if ((out_node->GetType() != MERGE) && (out_node->GetType() != REFMERGE)) { - continue; - } - std::string tmp_label; - GE_CHECK_NOTNULL(out_node->GetOpDesc()); - (void)AttrUtils::GetStr(out_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, tmp_label); - bool need_skip = !(batch_label.empty() || tmp_label.empty() || (batch_label == tmp_label)); - if (need_skip) { - continue; - } - - NodePtr next_node = nullptr; - if (FindTargetNode(out_node, NEXTITERATION, true, batch_label, next_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, - "Get NextIteration node failed: inputs of Merge should be Enter/NextIteration, current_Merge=%s", - out_node->GetName().c_str()); - return INTERNAL_ERROR; - } - batch_iter.second->merge_next_pairs.emplace_back(std::make_pair(out_node, next_node)); - - NodePtr switch_node = nullptr; - if (FindTargetNode(out_node, SWITCH, false, batch_label, switch_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Get Switch node failed: output of Merge should be Switch, current_Merge=%s", - out_node->GetName().c_str()); - return INTERNAL_ERROR; - } - if (switch_node == nullptr) { - continue; - } - - NodePtr loop_cond = nullptr; - if (FindTargetNode(switch_node, LOOPCOND, true, batch_label, loop_cond) != SUCCESS) { - GELOGE(INTERNAL_ERROR, - "Get LoopCond node failed: pred input of Switch should be LoopCond, current_Switch=%s", - switch_node->GetName().c_str()); - return INTERNAL_ERROR; - } - if (batch_iter.second->loop_cond == nullptr) { - batch_iter.second->loop_cond = loop_cond; - } else if (batch_iter.second->loop_cond != loop_cond) { - GELOGE(FAILED, "Multi LoopCond nodes exist."); - return FAILED; - } + for (const auto &enter_node : loop_group_iter.second->enter_nodes) { + for (const auto &out_node : enter_node->GetOutAllNodes()) { + const string &type = out_node->GetType(); + if ((type != MERGE) && (type != REFMERGE)) { + continue; + } + + NodePtr next_node = nullptr; + if (FindTargetNode(out_node, NEXTITERATION, true, next_node) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Get NextIteration node failed, frame_name: %s", frame_name.c_str()); + return INTERNAL_ERROR; + } + loop_group_iter.second->merge_next_pairs.emplace_back(std::make_pair(out_node, next_node)); + + NodePtr switch_node = nullptr; + if (FindTargetNode(out_node, SWITCH, false, switch_node) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Get Switch node failed, frame_name: %s.", frame_name.c_str()); + return INTERNAL_ERROR; + } + if (switch_node == nullptr) { + continue; + } + + NodePtr loop_cond = nullptr; + if (FindTargetNode(switch_node, LOOPCOND, true, loop_cond) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Get LoopCond node failed, frame_name: %s.", frame_name.c_str()); + return INTERNAL_ERROR; + } + if (loop_group_iter.second->loop_cond == nullptr) { + loop_group_iter.second->loop_cond = loop_cond; + } else if (loop_group_iter.second->loop_cond != loop_cond) { + GELOGE(FAILED, "Multi LoopCond nodes exist, frame_name: %s.", frame_name.c_str()); + return FAILED; } } } @@ -223,18 +152,16 @@ bool NextIterationPass::VerifyWhileGroup() { GELOGE(INTERNAL_ERROR, "Verify while group failed, frame_name is empty."); return false; } - for (const auto &batch_iter : loop_group_iter.second) { - if (batch_iter.second->loop_cond == nullptr) { - GELOGE(INTERNAL_ERROR, "Verify while group failed, LoopCond is null, frame_name: %s.", frame_name.c_str()); - return false; - } + if (loop_group_iter.second->loop_cond == nullptr) { + GELOGE(INTERNAL_ERROR, "Verify while group failed, LoopCond is null, frame_name: %s.", frame_name.c_str()); + return false; + } - for (const auto &pair_iter : batch_iter.second->merge_next_pairs) { - if ((pair_iter.first == nullptr) || (pair_iter.second == nullptr)) { - GELOGE(INTERNAL_ERROR, "Verify while group failed, merge_node/next_node is null, frame_name: %s.", - frame_name.c_str()); - return false; - } + for (const auto &pair_iter : loop_group_iter.second->merge_next_pairs) { + if ((pair_iter.first == nullptr) || (pair_iter.second == nullptr)) { + GELOGE(INTERNAL_ERROR, "Verify while group failed, merge_node/next_node is null, frame_name: %s.", + frame_name.c_str()); + return false; } } } @@ -249,56 +176,53 @@ bool NextIterationPass::VerifyWhileGroup() { /// Status NextIterationPass::HandleWhileGroup(ComputeGraphPtr &graph) { for (const auto &loop_cond_iter : loop_group_map_) { - for (const auto &batch_iter : loop_cond_iter.second) { - const std::string &cond_name = batch_iter.second->loop_cond->GetName(); - GELOGI("Handle while group, LoopCond node: %s.", cond_name.c_str()); - - // Create Active node, Enter->Active->Merge, NextIteration->Active->Merge - NodePtr enter_active = CreateActiveNode(graph, cond_name + "_Enter_" + STREAMACTIVE); - NodePtr next_active = CreateActiveNode(graph, cond_name + "_Next_" + STREAMACTIVE); - if ((enter_active == nullptr) || (next_active == nullptr)) { - GELOGE(INTERNAL_ERROR, "Create active node failed, cond_name: %s.", cond_name.c_str()); + const std::string &cond_name = loop_cond_iter.second->loop_cond->GetName(); + GELOGI("Handle while group, LoopCond node: %s.", cond_name.c_str()); + + // Create Active node, Enter->Active->Merge, NextIteration->Active->Merge + NodePtr enter_active = CreateActiveNode(graph, cond_name + "_Enter_" + STREAMACTIVE); + NodePtr next_active = CreateActiveNode(graph, cond_name + "_Next_" + STREAMACTIVE); + if ((enter_active == nullptr) || (next_active == nullptr)) { + GELOGE(INTERNAL_ERROR, "Create active node failed, cond_name: %s.", cond_name.c_str()); + return INTERNAL_ERROR; + } + + for (const auto &enter_node : loop_cond_iter.second->enter_nodes) { + // Enter --> Active + if (GraphUtils::AddEdge(enter_node->GetOutControlAnchor(), enter_active->GetInControlAnchor()) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add control edge from %s to %s failed.", enter_node->GetName().c_str(), + enter_active->GetName().c_str()); return INTERNAL_ERROR; } + } - for (const auto &enter_node : batch_iter.second->enter_nodes) { - // Enter --> Active - if (GraphUtils::AddEdge(enter_node->GetOutControlAnchor(), enter_active->GetInControlAnchor()) != - GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Add control edge failed."); - return INTERNAL_ERROR; - } + for (const auto &pair : loop_cond_iter.second->merge_next_pairs) { + NodePtr merge_node = pair.first; + NodePtr next_node = pair.second; + // Active --> Merge + if (GraphUtils::AddEdge(enter_active->GetOutControlAnchor(), merge_node->GetInControlAnchor()) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add control edge failed."); + return INTERNAL_ERROR; } - for (const auto &pair : batch_iter.second->merge_next_pairs) { - NodePtr merge_node = pair.first; - NodePtr next_node = pair.second; - // Active --> Merge - if (GraphUtils::AddEdge(enter_active->GetOutControlAnchor(), merge_node->GetInControlAnchor()) != - GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Add control edge failed."); - return INTERNAL_ERROR; - } - - // NextIteration --> Active - if (GraphUtils::AddEdge(next_node->GetOutControlAnchor(), next_active->GetInControlAnchor()) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Add control edge failed."); - return INTERNAL_ERROR; - } - - // break link between NextIteration and Merge - if (BreakNextIteration(next_node, merge_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Break NextIteration failed"); - return INTERNAL_ERROR; - } + // NextIteration --> Active + if (GraphUtils::AddEdge(next_node->GetOutControlAnchor(), next_active->GetInControlAnchor()) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add control edge failed."); + return INTERNAL_ERROR; } - if ((SetActiveLabelList(enter_active, {cond_name}) != SUCCESS) || - (SetActiveLabelList(next_active, {cond_name}) != SUCCESS)) { - GELOGE(INTERNAL_ERROR, "Set attr ACTIVE_LABEL_LIST failed."); + // break link between NextIteration and Merge + if (BreakNextIteration(next_node, merge_node) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Break NextIteration failed"); return INTERNAL_ERROR; } } + + if ((SetActiveLabelList(enter_active, {cond_name}) != SUCCESS) || + (SetActiveLabelList(next_active, {cond_name}) != SUCCESS)) { + GELOGE(INTERNAL_ERROR, "Set attr ACTIVE_LABEL_LIST failed."); + return INTERNAL_ERROR; + } } return SUCCESS; @@ -365,12 +289,11 @@ Status NextIterationPass::BreakNextIteration(const NodePtr &next_node, NodePtr & /// @param [in] node /// @param [in] target_type /// @param [in] is_input -/// @param [in] batch_label /// @param [out] target_node /// @return Status /// Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string &target_type, bool is_input, - const std::string &batch_label, NodePtr &target_node) { + NodePtr &target_node) { if (node == nullptr) { GELOGE(PARAM_INVALID, "node is null."); return PARAM_INVALID; @@ -387,12 +310,6 @@ Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string } for (const auto &tmp_node : nodes) { - std::string tmp_label; - (void)AttrUtils::GetStr(tmp_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, tmp_label); - bool need_skip = !(batch_label.empty() || tmp_label.empty() || (batch_label == tmp_label)); - if (need_skip) { - continue; - } const std::string type = tmp_node->GetType(); if ((target_type == LOOPCOND) && (type == target_type)) { target_node = tmp_node; @@ -415,7 +332,6 @@ Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string /// @return SUCCESS /// Status NextIterationPass::ClearStatus() { - frame_enter_map_.clear(); loop_group_map_.clear(); return SUCCESS; } diff --git a/ge/graph/passes/next_iteration_pass.h b/ge/graph/passes/next_iteration_pass.h index f8223c20..3266254d 100644 --- a/ge/graph/passes/next_iteration_pass.h +++ b/ge/graph/passes/next_iteration_pass.h @@ -47,13 +47,6 @@ class NextIterationPass : public GraphPass { Status GroupEnterNode(const NodePtr &enter_node); /// - /// @brief Group Enter nodes without batch_label attr - /// @param [in] compute_graph - /// @return Status - /// - Status GroupWithNoBatch(const ComputeGraphPtr &graph); - - /// /// @brief Find while groups /// @return Status /// @@ -97,13 +90,10 @@ class NextIterationPass : public GraphPass { /// @param [out] target_node /// @return Status /// - Status FindTargetNode(const NodePtr &node, const std::string &target_type, bool is_input, - const std::string &batch_label, NodePtr &target_node); + Status FindTargetNode(const NodePtr &node, const std::string &target_type, bool is_input, NodePtr &target_node); - // map> - std::unordered_map> frame_enter_map_; - // map> - std::unordered_map> loop_group_map_; + // map + std::unordered_map loop_group_map_; }; } // namespace ge #endif // GE_GRAPH_PASSES_NEXT_ITERATION_PASS_H_ diff --git a/ge/graph/passes/remove_same_const_pass.cc b/ge/graph/passes/remove_same_const_pass.cc new file mode 100644 index 00000000..e75a4553 --- /dev/null +++ b/ge/graph/passes/remove_same_const_pass.cc @@ -0,0 +1,106 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "remove_same_const_pass.h" + +#include +#include +#include + +#include "common/base64.h" +#include "ge_local_engine/engine/host_cpu_engine.h" +#include "graph/utils/node_utils.h" + +namespace ge { +namespace { +std::string GetCseKey(const NodePtr &node) { + std::stringstream ss; + ss << node->GetType() << "control-inputs-"; + std::set control_in_node_names; + for (auto &src_node : node->GetInControlNodes()) { + control_in_node_names.insert(src_node->GetName()); + } + for (auto &name : control_in_node_names) { + ss << name << "-"; + } + + ss << "attrs-" << AttrUtils::GetAllAttrsStr(node->GetOpDesc()); + + return ss.str(); +} + +bool IsConstType(const NodePtr &node) { return (node->GetType() == CONSTANT || node->GetType() == CONSTANTOP); } +} // namespace +Status RemoveSameConstPass::Run(ComputeGraphPtr graph) { + GELOGD("Begin to run RemoveSameConstPass on the graph"); + GE_CHECK_NOTNULL(graph); + std::map keys_to_node; + for (const auto &node : graph->GetDirectNode()) { + GE_CHECK_NOTNULL(node); + if (!IsConstType(node)) { + continue; + } + bool is_unknown = false; + auto ret = NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown); + if (ret != GRAPH_SUCCESS) { + GELOGW("Get node unknown status failed, node name:%s, type:%s.", + node->GetName().c_str(), node->GetType().c_str()); + continue; + } + if (is_unknown) { + GELOGI("Current node %s, type %s is unknown shape which should be skip.", + node->GetName().c_str(), node->GetType().c_str()); + continue; + } + auto key = GetCseKey(node); + GELOGD("The const node %s cse key %s", node->GetName().c_str(), ge::base64::EncodeToBase64(key).c_str()); + auto iter = keys_to_node.find(key); + if (iter == keys_to_node.end()) { + keys_to_node[key] = node; + continue; + } + + if (node->GetAllOutDataAnchorsSize() != iter->second->GetAllOutDataAnchorsSize()) { + GELOGW("The const node %s and %s have the same CSE key, but different output anchor count, skip to fusion them", + iter->second->GetName().c_str(), node->GetName().c_str()); + continue; + } + + std::vector output_map(node->GetAllOutDataAnchorsSize()); + for (size_t i = 0; i < node->GetAllOutDataAnchorsSize(); ++i) { + output_map[i] = i; + } + + ret = GraphUtils::ReplaceNodeAnchors(iter->second, node, {}, output_map); + if (ret != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to replace node %s by node %s", node->GetName().c_str(), + iter->second->GetName().c_str(), ret); + return INTERNAL_ERROR; + } + + NodeUtils::UnlinkAll(*node); + + ret = GraphUtils::RemoveNodeWithoutRelink(graph, node); + if (ret != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to remove node %s from graph", node->GetName().c_str()); + return INTERNAL_ERROR; + } + + GELOGI("Remove const node %s by RemoveSameConstPass, replace it with node %s", node->GetName().c_str(), + iter->second->GetName().c_str()); + } + return SUCCESS; +} +} // namespace ge diff --git a/ge/graph/passes/remove_same_const_pass.h b/ge/graph/passes/remove_same_const_pass.h new file mode 100644 index 00000000..08905bd2 --- /dev/null +++ b/ge/graph/passes/remove_same_const_pass.h @@ -0,0 +1,28 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef GE_GRAPH_PASSES_REMOVE_SAME_CONST_PASS_H_ +#define GE_GRAPH_PASSES_REMOVE_SAME_CONST_PASS_H_ + +#include "graph/types.h" +#include "inc/graph_pass.h" + +namespace ge { +class RemoveSameConstPass : public GraphPass { + public: + Status Run(ge::ComputeGraphPtr graph) override ; +}; +} // namespace ge +#endif //GE_GRAPH_PASSES_REMOVE_SAME_CONST_PASS_H_ diff --git a/ge/graph/passes/switch_to_stream_switch_pass.cc b/ge/graph/passes/switch_to_stream_switch_pass.cc index a7b922e0..f75a104f 100644 --- a/ge/graph/passes/switch_to_stream_switch_pass.cc +++ b/ge/graph/passes/switch_to_stream_switch_pass.cc @@ -17,8 +17,13 @@ #include "graph/passes/switch_to_stream_switch_pass.h" #include #include "common/ge/ge_util.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" +#include "framework/common/ge_inner_error_codes.h" +#include "framework/common/types.h" #include "ge/ge_api_types.h" #include "graph/common/omg_util.h" +#include "graph/debug/ge_attr_define.h" #include "graph/ge_context.h" #include "graph/utils/type_utils.h" @@ -120,13 +125,12 @@ void SwitchToStreamSwitchPass::MarkCycleDependence( if (visited.count(tmp_node) > 0) { continue; } + GELOGD("MarkCycleDependence: tmp_node=%s.", tmp_node->GetName().c_str()); for (const NodePtr &out_node : tmp_node->GetOutAllNodes()) { if (switch_nodes.find(out_node) == switch_nodes.end()) { out_nodes.push(out_node); continue; } - GELOGD("MarkCycleDependence: tmp_node=%s, switch_node=%s.", - tmp_node->GetName().c_str(), out_node->GetName().c_str()); GE_IF_BOOL_EXEC(SetCyclicDependenceFlag(out_node) != SUCCESS, GELOGW("set cyclic dependence attr failed."); return ); auto map_iter = switch_cyclic_map_.find(out_node); @@ -598,7 +602,7 @@ Status SwitchToStreamSwitchPass::AddConstNode(const ComputeGraphPtr &graph, cons /// Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_node, const NodePtr &cast_node, const std::set &same_cond_switch) { - GELOGD("ModifySwitchInCtlEdges: switch_node=%s, active_node=%s", switch_node->GetName().c_str(), + GELOGI("ModifySwitchInCtlEdges: switch_node=%s, active_node=%s", switch_node->GetName().c_str(), cast_node->GetName().c_str()); std::string orig_switch_name = switch_node->GetName(); OpDescPtr switch_desc = switch_node->GetOpDesc(); @@ -649,7 +653,7 @@ Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_no /// Status SwitchToStreamSwitchPass::ModifySwitchOutCtlEdges(const NodePtr &switch_node, const NodePtr &stream_switch, const NodePtr &active_node) { - GELOGD("ModifySwitchOutCtlEdges: switch_node=%s, stream_switch=%s, active_node=%s", switch_node->GetName().c_str(), + GELOGI("ModifySwitchOutCtlEdges: switch_node=%s, stream_switch=%s, active_node=%s", switch_node->GetName().c_str(), stream_switch->GetName().c_str(), active_node->GetName().c_str()); auto find_res = switch_node_map_.find(switch_node); GE_IF_BOOL_EXEC(find_res == switch_node_map_.end(), { diff --git a/ge/graph/passes/useless_control_out_remove_pass.cc b/ge/graph/passes/useless_control_out_remove_pass.cc new file mode 100644 index 00000000..4d74d582 --- /dev/null +++ b/ge/graph/passes/useless_control_out_remove_pass.cc @@ -0,0 +1,51 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/useless_control_out_remove_pass.h" + +#include "graph/debug/ge_attr_define.h" +#include "graph/utils/graph_utils.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/debug/log.h" + +namespace ge { +Status UselessControlOutRemovePass::Run(NodePtr &node) { + GE_CHECK_NOTNULL(node); + + if ((node->GetType() != CONSTANT) && (node->GetType() != CONSTANTOP)) { + return SUCCESS; + } + GELOGD("UselessControlOutRemovePass running, node: %s.", node->GetName().c_str()); + + // const has no control input + if (node->GetInControlNodes().empty()) { + if (node->GetOutDataNodes().empty()) { + // It is an isolated const, just remove it. + GELOGI("Delete isolated const: %s.", node->GetName().c_str()); + GE_CHK_STATUS_RET(IsolateAndDeleteNode(node, {})) + AddNodeDeleted(node); + } else { + auto out_ctrl_anchor = node->GetOutControlAnchor(); + if (out_ctrl_anchor != nullptr && !out_ctrl_anchor->GetPeerAnchors().empty()) { + GELOGI("Node: %s unlink all out control edge.", node->GetName().c_str()); + out_ctrl_anchor->UnlinkAll(); + } + } + } + + return SUCCESS; +} +} // namespace ge \ No newline at end of file diff --git a/ge/graph/passes/useless_control_out_remove_pass.h b/ge/graph/passes/useless_control_out_remove_pass.h new file mode 100644 index 00000000..d84b918f --- /dev/null +++ b/ge/graph/passes/useless_control_out_remove_pass.h @@ -0,0 +1,29 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_PASSES_USELESS_CONTROL_OUT_REMOVE_PASS_H_ +#define GE_GRAPH_PASSES_USELESS_CONTROL_OUT_REMOVE_PASS_H_ + +#include "graph/passes/base_pass.h" + +namespace ge { +class UselessControlOutRemovePass : public BaseNodePass { + public: + Status Run(NodePtr &node) override; +}; +} // namespace ge + +#endif // GE_GRAPH_PASSES_USELESS_CONTROL_OUT_REMOVE_PASS_H_ \ No newline at end of file diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index da862836..2ee5e330 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include "common/formats/format_transfers/format_transfer_fractal_nz.h" #include "common/formats/format_transfers/format_transfer_fractal_z.h" #include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h" @@ -27,9 +28,13 @@ #include "common/helper/model_helper.h" #include "common/math/math_util.h" #include "common/op/ge_op_utils.h" +#include "common/util/error_manager/error_manager.h" +#include "common/formats/utils/formats_trans_utils.h" +#include "framework/common/debug/ge_log.h" #include "graph/common/ge_call_wrapper.h" #include "graph/common/local_context.h" #include "graph/common/transop_util.h" +#include "graph/debug/ge_attr_define.h" #include "graph/ge_context.h" #include "graph/shape_refiner.h" #include "graph/manager/graph_var_manager.h" @@ -39,21 +44,29 @@ #include "graph/passes/aicpu_constant_folding_pass.h" #include "graph/passes/assert_pass.h" #include "graph/passes/assign_pass.h" +#include "graph/passes/base_pass.h" #include "graph/passes/common_subexpression_elimination_pass.h" #include "graph/passes/cond_pass.h" #include "graph/passes/cond_remove_pass.h" #include "graph/passes/constant_folding_pass.h" +#include "graph/passes/constant_fuse_same_pass.h" +#include "graph/passes/control_trigger_pass.h" #include "graph/passes/dimension_adjust_pass.h" #include "graph/passes/dimension_compute_pass.h" #include "graph/passes/dropout_pass.h" #include "graph/passes/enter_pass.h" +#include "graph/passes/flow_ctrl_pass.h" #include "graph/passes/for_pass.h" +#include "graph/passes/get_original_format_pass.h" #include "graph/passes/guarantee_const_pass.h" #include "graph/passes/hccl_group_pass.h" #include "graph/passes/hccl_memcpy_pass.h" #include "graph/passes/identity_pass.h" #include "graph/passes/infershape_pass.h" +#include "graph/passes/iterator_op_pass.h" +#include "graph/passes/merge_pass.h" #include "graph/passes/net_output_pass.h" +#include "graph/passes/next_iteration_pass.h" #include "graph/passes/no_use_reshape_remove_pass.h" #include "graph/passes/parallel_concat_start_op_pass.h" #include "graph/passes/placeholder_with_default_pass.h" @@ -68,18 +81,45 @@ #include "graph/passes/shape_operate_op_remove_pass.h" #include "graph/passes/snapshot_pass.h" #include "graph/passes/stop_gradient_pass.h" +#include "graph/passes/subgraph_pass.h" +#include "graph/passes/switch_data_edges_bypass.h" +#include "graph/passes/switch_dead_branch_elimination.h" +#include "graph/passes/switch_logic_remove_pass.h" +#include "graph/passes/merge_to_stream_merge_pass.h" +#include "graph/passes/switch_to_stream_switch_pass.h" +#include "graph/passes/attach_stream_label_pass.h" #include "graph/passes/unused_const_pass.h" +#include "graph/passes/unused_op_remove_pass.h" #include "graph/passes/var_is_initialized_op_pass.h" #include "graph/passes/variable_prepare_op_pass.h" #include "graph/preprocess/insert_op/util_insert_aipp_op.h" +#include "graph/types.h" +#include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" #include "inc/pass_manager.h" #include "init/gelib.h" #include "multi_batch_copy_graph.h" +#include "runtime/dev.h" +#include "graph/passes/dimension_adjust_pass.h" +#include "graph/passes/link_gen_mask_nodes_pass.h" +#include "graph/passes/permute_pass.h" +#include "graph/passes/reshape_remove_pass.h" +#include "graph/passes/same_transdata_breadth_fusion_pass.h" +#include "graph/passes/transop_breadth_fusion_pass.h" +#include "graph/passes/transop_depth_fusion_pass.h" +#include "graph/passes/transop_nearby_allreduce_fusion_pass.h" + +#include "graph/passes/cast_remove_pass.h" #include "graph/passes/data_pass.h" +#include "graph/passes/transop_without_reshape_fusion_pass.h" +#include "graph/passes/transpose_transdata_pass.h" +#include "graph/passes/variable_op_pass.h" +#include "graph/passes/variable_prepare_op_pass.h" +#include "graph/passes/variable_ref_delete_op_pass.h" #include "graph/passes/mark_agnostic_pass.h" + namespace ge { namespace { static std::map output_type_str_to_datatype = { diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc index a90f145e..16535a59 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -44,6 +44,8 @@ using std::set; using std::string; using std::vector; +using std::map; +using std::queue; namespace ge { namespace multibatch { @@ -57,10 +59,15 @@ const int kDataInIndex = 0; const int kMergeDataOutIndex = 0; const int kStaticOutput = -1; const int kDivisionConst = 2; +const int32_t kOneInDataNode = 1; +const int32_t kFindNoMatch = 0; inline bool IsDataLikeType(const std::string &node_type) { return (node_type == DATA) || (node_type == AIPP); } +inline bool IsEnterType(const string &node_type) { return (node_type == ENTER) || (node_type == REFENTER); } +const set unchange_types({CONSTANT, CONSTANTOP, ENTER, REFENTER}); + inline bool IsGetNextType(const NodePtr &node) { std::string original_type; GE_IF_BOOL_EXEC(GetOriginalType(node, original_type) != SUCCESS, @@ -218,12 +225,6 @@ Status MultiBatchGraphCopyer::CopyGraph() { return ret; } - ret = InsertIdentityAfterSwitchN(); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to insert identity nodes after switchn node."); - return INTERNAL_ERROR; - } - GELOGI("Begin to remove useless nodes by prune pass after copy process"); PrunePass prune_pass; ret = prune_pass.Run(graph_); @@ -240,6 +241,18 @@ Status MultiBatchGraphCopyer::Init() { return ret; } + ret = RelinkConstCtrlEdge(); + if (ret != SUCCESS) { + GELOGE(FAILED, "Relink const's control edge failed."); + return FAILED; + } + + ret = ExtractUnchangedStructureOutofCycle(); + if (ret != SUCCESS) { + GELOGE(FAILED, "Extract unchanged structure out of cycle failed."); + return FAILED; + } + for (auto &node : graph_->GetAllNodes()) { origin_all_nodes_.emplace_back(node); if (IsDataLikeType(node->GetType())) { @@ -252,6 +265,281 @@ Status MultiBatchGraphCopyer::Init() { return SUCCESS; } +Status MultiBatchGraphCopyer::RelinkConstCtrlEdge() { + for (auto &node : graph_->GetAllNodes()) { + GE_CHECK_NOTNULL(node); + if ((node->GetType() == CONSTANT) || (node->GetType() == CONSTANTOP)) { + if (node->GetOutDataNodes().empty()) { + continue; + } + if (!node->GetInControlNodes().empty()) { + auto in_ctrl_nodes = node->GetInControlNodes(); + auto out_nodes = node->GetOutAllNodes(); + bool has_merge_out = false; + for (const auto &out_node : out_nodes) { + GE_CHECK_NOTNULL(out_node); + if (out_node->GetType() == MERGE || out_node->GetType() == REFMERGE) { + has_merge_out = true; + break; + } + } + if (has_merge_out) { + continue; + } + auto in_ctrl_anchor = node->GetInControlAnchor(); + GE_CHECK_NOTNULL(in_ctrl_anchor); + in_ctrl_anchor->UnlinkAll(); + for (auto &in_ctrl_node : in_ctrl_nodes) { + auto out_ctrl_anchor_of_in_ctrl_node = in_ctrl_node->GetOutControlAnchor(); + GE_CHECK_NOTNULL(out_ctrl_anchor_of_in_ctrl_node); + for (auto &out_node : out_nodes) { + if (IsEnterType(out_node->GetType())) { + continue; + } + if (!out_ctrl_anchor_of_in_ctrl_node->IsLinkedWith(out_node->GetInControlAnchor())) { + GE_CHK_STATUS_RET(out_ctrl_anchor_of_in_ctrl_node->LinkTo(out_node->GetInControlAnchor())) + } + } + } + } + auto out_ctrl_anchor = node->GetOutControlAnchor(); + if (out_ctrl_anchor != nullptr) { + out_ctrl_anchor->UnlinkAll(); + } + } + } + + return SUCCESS; +} + +Status MultiBatchGraphCopyer::ExtractUnchangedStructureOutofCycle() { + map> frame_enter; + if (GetEnterNodesGroupByFrame(frame_enter) != SUCCESS) { + GELOGE(FAILED, "Get enter nodes grouped by frame_name failed."); + return FAILED; + } + + queue nodes_to_extract; + if (GetNodeNeedExtract(frame_enter, nodes_to_extract) != SUCCESS) { + GELOGE(FAILED, "Get nodes needed to extract failed."); + return FAILED; + } + + while (!nodes_to_extract.empty()) { + auto node = nodes_to_extract.front(); + nodes_to_extract.pop(); + OpDescPtr enter_desc = nullptr; + if (MoveInEntersInDataAnchorDown(node, enter_desc) != SUCCESS) { + GELOGE(FAILED, "Move in enter nodes' in data anchors down of %s failed.", node->GetName().c_str()); + return FAILED; + } + set out_nodes; + if (InsertEnterAfterNode(node, enter_desc, out_nodes) != SUCCESS) { + GELOGE(FAILED, "Insert enter node after %s failed.", node->GetName().c_str()); + return FAILED; + } + + if (MoveCtrlEdgeToOutNodes(node, out_nodes) != SUCCESS) { + GELOGE(FAILED, "Move %s's control edge to out nodes failed.", node->GetName().c_str()); + return FAILED; + } + + for (auto &out_node : out_nodes) { + GE_CHECK_NOTNULL(out_node); + if (AllInDataNodesUnchangeAndNoMergeOut(out_node)) { + nodes_to_extract.push(out_node); + } + } + } + + if (DeleteEnterWithoutDataOut() != SUCCESS) { + GELOGE(FAILED, "Delete enter node without out data nodes failed."); + return FAILED; + } + + return SUCCESS; +} + +Status MultiBatchGraphCopyer::GetEnterNodesGroupByFrame(map> &frame_enter) { + for (auto &node : graph_->GetAllNodes()) { + GE_CHECK_NOTNULL(node); + if (IsEnterType(node->GetType())) { + if (!node->GetInControlNodes().empty() || !node->GetOutControlNodes().empty()) { + continue; + } + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + string frame_name; + if (!AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) { + GELOGE(FAILED, "Get attr frame_name of enter[%] failed.", node->GetName().c_str()); + return FAILED; + } + frame_enter[frame_name].emplace_back(node); + } + } + + return SUCCESS; +} + +Status MultiBatchGraphCopyer::GetNodeNeedExtract(const map> &frame_enter, + queue &nodes_to_extract) { + for (const auto &one_group : frame_enter) { + auto enters = one_group.second; + for (const auto &enter : enters) { + auto out_data_nodes = enter->GetOutDataNodes(); + for (const auto &out_data_node : out_data_nodes) { + GE_CHECK_NOTNULL(out_data_node); + if (AllInDataNodesUnchangeAndNoMergeOut(out_data_node)) { + nodes_to_extract.push(out_data_node); + } + } + } + } + + return SUCCESS; +} + +bool MultiBatchGraphCopyer::AllInDataNodesUnchangeAndNoMergeOut(const NodePtr &node) { + auto out_data_nodes = node->GetOutDataNodes(); + for (const auto &out_data_node : out_data_nodes) { + if (out_data_node == nullptr) { + return false; + } + + if (out_data_node->GetType() == MERGE || out_data_node->GetType() == REFMERGE) { + return false; + } + } + + auto in_data_nodes = node->GetInDataNodes(); + if (in_data_nodes.size() == kOneInDataNode) { + return true; + } + + for (const auto &in_data_node : in_data_nodes) { + if (in_data_node == nullptr) { + return false; + } + if (unchange_types.count(in_data_node->GetType()) == kFindNoMatch) { + return false; + } + } + + return true; +} + +Status MultiBatchGraphCopyer::MoveInEntersInDataAnchorDown(NodePtr &node, OpDescPtr &enter_desc) { + auto in_data_anchors = node->GetAllInDataAnchors(); + for (auto &in_data_anchor : in_data_anchors) { + auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); + GE_CHECK_NOTNULL(peer_out_data_anchor); + auto peer_in_data_node = peer_out_data_anchor->GetOwnerNode(); + if (IsEnterType(peer_in_data_node->GetType())) { + GE_CHK_STATUS_RET(peer_out_data_anchor->Unlink(in_data_anchor)) + GELOGD("Unlink data edge from %s to %s.", peer_in_data_node->GetName().c_str(), node->GetName().c_str()); + auto enter_in_data_anchors = peer_in_data_node->GetAllInDataAnchors(); + for (auto &enter_in_data_anchor : enter_in_data_anchors) { + auto peer_out_data_anchor_of_enter = enter_in_data_anchor->GetPeerOutAnchor(); + GE_CHECK_NOTNULL(peer_out_data_anchor_of_enter); + if (peer_out_data_anchor_of_enter->IsLinkedWith(in_data_anchor)) { + continue; + } + GE_CHK_STATUS_RET(peer_out_data_anchor_of_enter->LinkTo(in_data_anchor)) + GELOGD("Relink data edge from %s to %s.", peer_out_data_anchor_of_enter->GetOwnerNode()->GetName().c_str(), + node->GetName().c_str()); + } + enter_desc = peer_in_data_node->GetOpDesc(); + GE_CHECK_NOTNULL(enter_desc); + } + } + + return SUCCESS; +} + +Status MultiBatchGraphCopyer::InsertEnterAfterNode(NodePtr &node, const OpDescPtr ©_desc, set &out_nodes) { + if (copy_desc == nullptr) { + return SUCCESS; + } + map>> outanchors_inanchors_nodes; + auto out_data_anchors = node->GetAllOutDataAnchors(); + for (auto &out_data_anchor : out_data_anchors) { + auto peer_in_data_anchors = out_data_anchor->GetPeerInDataAnchors(); + for (auto peer_in_data_anchor : peer_in_data_anchors) { + GE_CHECK_NOTNULL(peer_in_data_anchor); + auto peer_in_data_node = peer_in_data_anchor->GetOwnerNode(); + out_nodes.emplace(peer_in_data_node); + outanchors_inanchors_nodes[out_data_anchor].emplace_back(std::make_pair(peer_in_data_anchor, peer_in_data_node)); + } + } + + int32_t i = 0; + auto node_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(node_desc); + // Insert one enter node after node's per out data anchor + for (auto &outanchor_inanchors_nodes : outanchors_inanchors_nodes) { + string name = node->GetName() + "_" + ENTER + "_" + std::to_string(i++); + GELOGD("Create Enter op %s after %s.", name.c_str(), node->GetName().c_str()); + auto enter_desc = AttrUtils::CopyOpDesc(copy_desc); + enter_desc->SetName(name); + GE_CHK_STATUS_RET( + enter_desc->UpdateInputDesc("x", node_desc->GetOutputDesc(outanchor_inanchors_nodes.first->GetIdx()))) + GE_CHK_STATUS_RET( + enter_desc->UpdateOutputDesc("y", node_desc->GetOutputDesc(outanchor_inanchors_nodes.first->GetIdx()))) + auto enter_node = graph_->AddNode(enter_desc); + GE_CHECK_NOTNULL(enter_node); + GE_CHK_STATUS_RET(outanchor_inanchors_nodes.first->LinkTo(enter_node->GetInDataAnchor(kDataInIndex))) + GE_CHECK_NOTNULL(enter_node->GetOutDataAnchor(kDataInIndex)); + for (auto &inanchor_node : outanchor_inanchors_nodes.second) { + GE_CHK_STATUS_RET(outanchor_inanchors_nodes.first->Unlink(inanchor_node.first)) + GE_CHK_STATUS_RET(enter_node->GetOutDataAnchor(kDataInIndex)->LinkTo(inanchor_node.first)) + GELOGD("Unlink from %s to %s, link from %s to %s then to %s.", node->GetName().c_str(), + inanchor_node.second->GetName().c_str(), node->GetName().c_str(), enter_node->GetName().c_str(), + inanchor_node.second->GetName().c_str()); + } + } + + return SUCCESS; +} + +// Move node's in control edges to out data nodes +Status MultiBatchGraphCopyer::MoveCtrlEdgeToOutNodes(NodePtr &node, set &out_nodes) { + auto in_ctrl_anchor = node->GetInControlAnchor(); + GE_CHECK_NOTNULL(in_ctrl_anchor); + auto peer_out_ctrl_anchors = in_ctrl_anchor->GetPeerOutControlAnchors(); + for (auto &peer_out_ctrl_anchor : peer_out_ctrl_anchors) { + GE_CHK_STATUS_RET(peer_out_ctrl_anchor->Unlink(in_ctrl_anchor)) + GELOGD("Unlink control edge from %s to %s.", peer_out_ctrl_anchor->GetOwnerNode()->GetName().c_str(), + node->GetName().c_str()); + for (auto &out_node : out_nodes) { + auto in_ctrl_anchor_of_out_node = out_node->GetInControlAnchor(); + GE_CHECK_NOTNULL(in_ctrl_anchor_of_out_node); + if (!peer_out_ctrl_anchor->IsLinkedWith(in_ctrl_anchor_of_out_node)) { + GE_CHK_STATUS_RET(peer_out_ctrl_anchor->LinkTo(in_ctrl_anchor_of_out_node)) + GELOGD("Link control edge from %s to %s.", peer_out_ctrl_anchor->GetOwnerNode()->GetName().c_str(), + out_node->GetName().c_str()); + } + } + } + + return SUCCESS; +} + +Status MultiBatchGraphCopyer::DeleteEnterWithoutDataOut() { + for (auto &node : graph_->GetAllNodes()) { + GE_CHECK_NOTNULL(node); + if (IsEnterType(node->GetType())) { + auto out_nodes = node->GetOutAllNodes(); + if (out_nodes.empty()) { + GELOGD("Delete enter node: %s which has no output.", node->GetName().c_str()); + GE_CHK_STATUS_RET(GraphUtils::IsolateNode(node, {})) + GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph_, node)) + } + } + } + + return SUCCESS; +} + void MultiBatchGraphCopyer::LabelStatusForData(const NodePtr &data) { auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); GELOGI("Label status for %s, shape_dims is %s.", data->GetName().c_str(), @@ -297,6 +585,9 @@ Status MultiBatchGraphCopyer::LabelInBatchBranchStatus() { LabelStatusForGetNextSink(data); } } + + map> frame_enters; + InitStatus(frame_enters); bool changed = true; // If anyone of in node is kNodeInBatchBranch, it is also kNodeInBatchBranch while (changed) { @@ -306,12 +597,13 @@ Status MultiBatchGraphCopyer::LabelInBatchBranchStatus() { if (iter != origin_nodes_status_.end()) { continue; } - for (auto &in_node : node->GetInAllNodes()) { - bool is_in_batch = origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end() && - origin_nodes_status_[in_node.get()] == kNodeInBatchBranch; - if (is_in_batch) { - origin_nodes_status_[node.get()] = kNodeInBatchBranch; - changed = true; + for (auto &in_node : node->GetInDataNodes()) { + if (origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end()) { + if (origin_nodes_status_.find(node.get()) == origin_nodes_status_.end()) { + origin_nodes_status_[node.get()] == kNodeInBatchBranch; + ResetEnterStatus(frame_enters, node); + changed = true; + } break; } } @@ -320,6 +612,45 @@ Status MultiBatchGraphCopyer::LabelInBatchBranchStatus() { return SUCCESS; } +void MultiBatchGraphCopyer::InitStatus(map> &frame_enters) { + for (const auto &node : origin_all_nodes_) { + if (!IsEnterType(node->GetType())) { + continue; + } + auto op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + continue; + } + string frame_name; + if (AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) { + frame_enters[frame_name].emplace_back(node); + } + } + + for (const auto &data : origin_data_nodes_) { + auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); + if (!IsAllDimsPositive(data_shape.GetDims())) { + origin_nodes_status_[data.get()] = kNodeInBatchBranch; + } + } +} + +void MultiBatchGraphCopyer::ResetEnterStatus(map> &frame_enters, const NodePtr &node) { + if (!IsEnterType(node->GetType())) { + return; + } + + for (const auto &frame_enter : frame_enters) { + auto &enters = frame_enter.second; + if (std::find(enters.begin(), enters.end(), node) != enters.end()) { + for (const auto &enter : enters) { + origin_nodes_status_[enter.get()] = kNodeInBatchBranch; + } + break; + } + } +} + Status MultiBatchGraphCopyer::LabelStatus() { if (LabelInBatchBranchStatus() != SUCCESS) { GELOGE(PARAM_INVALID, "Failed to label no in batch branch"); @@ -1360,52 +1691,6 @@ Status MultiBatchGraphCopyer::LinkToNodeOutBranch(const NodePtr &node) { return SUCCESS; } -Status MultiBatchGraphCopyer::InsertIdentityAfterSwitchN() { - for (auto &node : graph_->GetAllNodes()) { - if (node->GetType() != SWITCHN) { - continue; - } - auto switchn_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(switchn_desc); - size_t i = 0; - for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { - for (auto &in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { - auto out_node = in_data_anchor->GetOwnerNode(); - auto op_desc = out_node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - if ((out_node->GetType() == MERGE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) { - GELOGD("No need to insert identity between %s and %s.", node->GetName().c_str(), out_node->GetName().c_str()); - continue; - } - - auto identity_desc = MakeShared(node->GetName() + "_identity_" + std::to_string(i), IDENTITY); - GE_CHECK_NOTNULL(identity_desc); - - string batch_label; - if (AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { - if (!AttrUtils::SetStr(identity_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { - GELOGE(FAILED, "Set attr ATTR_NAME_BATCH_LABEL failed, node:%s.", identity_desc->GetName().c_str()); - return FAILED; - } - } - - auto data_desc = switchn_desc->GetOutputDesc(i); - i++; - GE_CHK_STATUS_RET(identity_desc->AddInputDesc("x", data_desc)); - GE_CHK_STATUS_RET(identity_desc->AddOutputDesc("y", data_desc)); - - auto identity_node = graph_->AddNode(identity_desc); - GE_CHECK_NOTNULL(identity_node); - GE_CHK_STATUS_RET(out_data_anchor->LinkTo(identity_node->GetInDataAnchor(0))); - GE_CHECK_NOTNULL(identity_node->GetOutControlAnchor()); - GE_CHK_STATUS_RET(identity_node->GetOutControlAnchor()->LinkTo(out_node->GetInControlAnchor())); - } - } - } - - return SUCCESS; -} - Status ProcessMultiBatch(ComputeGraphPtr &graph) { if (GetLocalOmgContext().dynamic_node_type.empty()) { const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN"); @@ -1415,6 +1700,7 @@ Status ProcessMultiBatch(ComputeGraphPtr &graph) { return pass_manager.Run(graph); } } + if (!GetLocalOmgContext().need_multi_batch) { GELOGI("No need to process_multi for no_train graph."); return SUCCESS; diff --git a/ge/graph/preprocess/multi_batch_copy_graph.h b/ge/graph/preprocess/multi_batch_copy_graph.h index a0de4413..d51c4c02 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.h +++ b/ge/graph/preprocess/multi_batch_copy_graph.h @@ -18,6 +18,7 @@ #include #include #include +#include #include "external/ge/ge_api_error_codes.h" @@ -64,12 +65,26 @@ class MultiBatchGraphCopyer { private: Status Init(); Status CheckArguments(); + Status RelinkConstCtrlEdge(); + + Status ExtractUnchangedStructureOutofCycle(); + Status GetEnterNodesGroupByFrame(std::map> &frame_enter); + Status GetNodeNeedExtract(const std::map> &frame_enter, + std::queue &nodes_to_extract); + bool AllInDataNodesUnchangeAndNoMergeOut(const NodePtr &node); + Status MoveInEntersInDataAnchorDown(NodePtr &node, OpDescPtr &enter_desc); + Status InsertEnterAfterNode(NodePtr &node, const OpDescPtr &enter_desc, std::set &out_nodes); + Status MoveCtrlEdgeToOutNodes(NodePtr &node, std::set &out_nodes); + Status DeleteEnterWithoutDataOut(); // label status for origin_all_nodes_ Status LabelStatus(); Status LabelInBatchBranchStatus(); void LabelStatusForData(const NodePtr &data); void LabelStatusForGetNextSink(const NodePtr &data); + void InitStatus(std::map> &frame_enters); + void ResetEnterStatus(std::map> &frame_enters, const NodePtr &node); + // add nodes functions Status CreateNewNodes(); @@ -81,7 +96,6 @@ class MultiBatchGraphCopyer { Status InsertSwitchNForData(const NodePtr &node, const size_t &out_anchor_index, const size_t &peer_in_anchor_index, std::vector> &dynamic_out_to_switchn); - Status InsertIdentityAfterSwitchN(); Status UpdateMaxShapeToData(const NodePtr &node, size_t out_anchor_index); Status UpdateShapeOfShapeNode(const NodePtr &node, size_t out_anchor_index); diff --git a/ge/host_kernels/dynamic_stitch_kernel.cc b/ge/host_kernels/dynamic_stitch_kernel.cc index d26237f4..32611b03 100644 --- a/ge/host_kernels/dynamic_stitch_kernel.cc +++ b/ge/host_kernels/dynamic_stitch_kernel.cc @@ -33,6 +33,8 @@ namespace { const int kDoubleAttrN = 2; const int kFirstOutputDescIdx = 0; const int kMergedShapeSecondDim = 1; +const size_t kNullTensorDimNum = 1; +const int64_t kNullTensorDimValue = 0; const std::set kSupportedTypeSet = {DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}; } // namespace @@ -177,7 +179,14 @@ Status DynamicStitchKernel::StitchDataFollowIndices(int64_t data_unit, const vec int64_t src_offset = 0; std::set indices_set; for (int i = 0; i < n_; i++) { - auto indices_shape_size = input[i]->GetTensorDesc().GetShape().GetShapeSize(); + GeShape indices_shape = input[i]->GetTensorDesc().GetShape(); + size_t indices_dim_num = indices_shape.GetDimNum(); + // skip null indices tensor + if (indices_dim_num == kNullTensorDimNum && indices_shape.GetDim(0) == kNullTensorDimValue) { + GELOGD("Input indices[%d] has null tensor, skip it.", i); + continue; + } + auto indices_shape_size = indices_shape.GetShapeSize(); // to normalize logic, assume scalar as vector with shape of [1]. indices_shape_size = (indices_shape_size == 0) ? 1 : indices_shape_size; // all index for input is less than size of input diff --git a/ge/hybrid/executor/hybrid_execution_context.h b/ge/hybrid/executor/hybrid_execution_context.h index 1fe40c77..f1c25290 100644 --- a/ge/hybrid/executor/hybrid_execution_context.h +++ b/ge/hybrid/executor/hybrid_execution_context.h @@ -22,6 +22,7 @@ #include "common/blocking_queue.h" #include "common/properties_manager.h" #include "framework/common/debug/ge_log.h" +#include "graph/ge_local_context.h" #include "hybrid/common/npu_memory_allocator.h" #include "hybrid/common/tensor_value.h" #include "hybrid/executor/hybrid_profiler.h" @@ -38,6 +39,7 @@ struct GraphExecutionContext { uint64_t session_id = 0; const HybridModel *model = nullptr; + const GEThreadLocalContext *ge_context = nullptr; rtStream_t stream = nullptr; rtContext_t rt_context = nullptr; rtContext_t rt_gen_context = nullptr; diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index 8ba687c2..e17998db 100644 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -95,6 +95,7 @@ Status HybridModelExecutor::InitExecutionContext() { context_.stream = stream_; context_.model = model_; context_.session_id = ::ge::GetContext().SessionId(); + context_.ge_context = &GetThreadLocalContext(); GELOGD("session id from model = %lu, from context = %lu", model_->GetSessionId(), context_.session_id); context_.allocator = NpuMemoryAllocator::GetAllocator(device_id_); GE_CHECK_NOTNULL(context_.allocator); diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc index ceed40b0..033c5304 100644 --- a/ge/hybrid/executor/node_state.cc +++ b/ge/hybrid/executor/node_state.cc @@ -18,7 +18,6 @@ #include #include "framework/common/debug/log.h" #include "graph/compute_graph.h" -#include "graph/utils/tensor_utils.h" #include "hybrid_execution_context.h" #include "subgraph_context.h" @@ -36,31 +35,29 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item( this->num_pending_shapes_); } -Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target) { +Status ShapeInferenceState::UpdateInputShape(int idx, + const GeShape &ori_shape, + const GeShape &shape) { if (node_item.IsInputShapeStatic(idx)) { GELOGD("[%s] Trying to update static shape, idx = %d. old shape = [%s], new shape = [%s]", node_item.NodeName().c_str(), idx, node_item.MutableInputDesc(idx)->GetShape().ToString().c_str(), - target.GetShape().ToString().c_str()); + shape.ToString().c_str()); return SUCCESS; } - int64_t tensor_size = -1; - (void) TensorUtils::GetSize(target, tensor_size); - GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s], size = %ld", + GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s]", node_item.NodeName().c_str(), idx, - target.GetShape().ToString().c_str(), - target.GetOriginShape().ToString().c_str(), - tensor_size); + shape.ToString().c_str(), + ori_shape.ToString().c_str()); std::lock_guard lk(mu_); auto tensor_desc = node_item.MutableInputDesc(idx); GE_CHECK_NOTNULL(tensor_desc); - tensor_desc->SetShape(target.GetShape()); - tensor_desc->SetOriginShape(target.GetOriginShape()); - (void) TensorUtils::SetSize(*tensor_desc, tensor_size); + tensor_desc->SetShape(shape); + tensor_desc->SetOriginShape(ori_shape); if (--num_pending_shapes_ == 0) { ready_cv_.notify_all(); } @@ -113,24 +110,24 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex for (auto &p : shape_futures) { auto idx = p.first; auto &future = p.second; + GeShape shape; + GeShape ori_shape; RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx); - auto src_tensor_desc = future.GetTensorDesc(); - GE_CHECK_NOTNULL(src_tensor_desc); + GE_CHK_STATUS_RET(future.Get(ori_shape, shape), + "[%s] Get shape failed. index = %u", + node_item.NodeName().c_str(), + idx); RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx); - auto input_desc = node_item.MutableInputDesc(idx); - GE_CHECK_NOTNULL(input_desc); - int64_t tensor_size = -1; - (void) TensorUtils::GetSize(*src_tensor_desc, tensor_size); - GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s], index = %zu", + GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]", node_item.NodeName().c_str(), idx, - src_tensor_desc->GetShape().ToString().c_str(), - src_tensor_desc->GetOriginShape().ToString().c_str(), - tensor_size); - input_desc->SetShape(src_tensor_desc->GetShape()); - input_desc->SetOriginShape(src_tensor_desc->GetOriginShape()); - (void) TensorUtils::SetSize(*input_desc, tensor_size); + shape.ToString().c_str(), + ori_shape.ToString().c_str()); + auto input_desc = node_item.MutableInputDesc(idx); + GE_CHECK_NOTNULL(input_desc); + input_desc->SetShape(std::move(shape)); + input_desc->SetOriginShape(ori_shape); } return SUCCESS; @@ -193,14 +190,5 @@ Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) { GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str()); return SUCCESS; } - -GeTensorDescPtr ShapeFuture::GetTensorDesc() { - GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str()); - if (!subgraph_context_->Await(src_node_)) { - GELOGE(INTERNAL_ERROR, "cancelled"); - return nullptr; - } - return src_node_->GetOpDesc()->MutableOutputDesc(src_index_); -} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index 312e177f..04f1ee4b 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -35,7 +35,6 @@ class ShapeFuture { ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context); ~ShapeFuture() = default; Status Get(GeShape &ori_shape, GeShape &shape); - GeTensorDescPtr GetTensorDesc(); private: NodePtr src_node_; @@ -46,7 +45,7 @@ class ShapeFuture { struct ShapeInferenceState { explicit ShapeInferenceState(const NodeItem &node_item); - Status UpdateInputShape(int idx, const GeTensorDesc &tensor_desc); + Status UpdateInputShape(int idx, const GeShape &ori_shape, const GeShape &shape); void UpdateInputShapeFuture(int idx, ShapeFuture &&future); diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 4b6dddab..5a464f8e 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -96,7 +96,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vectorGetOrCreateNodeState(input_node); GE_CHECK_NOTNULL(node_state); - node_state->GetShapeInferenceState().UpdateInputShape(0, *tensor_desc); + node_state->GetShapeInferenceState().UpdateInputShape(0, tensor_desc->GetOriginShape(), tensor_desc->GetShape()); } } @@ -268,6 +268,13 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta } else { node_state.SetKernelTask(node_item.kernel_task); } + + GELOGD("[%s] Start to invoke CalcOpRunningParam.", node_item.NodeName().c_str()); + RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start"); + GE_CHK_STATUS_RET(NodeExecutorManager::GetInstance().CalcOpRunningParam(*node_item.node), + "[%s] Failed to invoke CalcOpRunningParam.", node_item.NodeName().c_str()); + RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] End"); + GELOGD("[%s] Done invoking CalcOpRunningParam successfully.", node_item.NodeName().c_str()); return SUCCESS; } diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index 819454db..b984eec3 100644 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -20,9 +20,12 @@ #include "graph/utils/tensor_adapter.h" #include "graph/debug/ge_attr_define.h" #include "hybrid/node_executor/node_executor.h" -#include "hybrid/executor//worker//shape_inference_engine.h" +#include "common/dump/dump_manager.h" #include "common/dump/dump_op.h" +#include "common/types.h" +#include "common/ge_types.h" #include "common/profiling/profiling_manager.h" +#include "runtime/base.h" namespace ge { namespace hybrid { @@ -345,10 +348,6 @@ Status NodeDoneCallback::OnNodeDone() { } GE_CHK_STATUS_RET_NOLOG(PrepareConstInputs(node_item)); - if (node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE) { - // update output tensor sizes - GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(node_item)); - } // PropagateOutputs for type == DEPEND_COMPUTE if (node_item.shape_inference_type == DEPEND_COMPUTE) { if (graph_context_->trace_enabled) { diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc index 66d0ede2..1d813526 100644 --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -17,15 +17,9 @@ #include "hybrid/executor/worker/shape_inference_engine.h" #include "graph/shape_refiner.h" #include "graph/utils/node_utils.h" -#include "graph/utils/tensor_utils.h" -#include "graph/utils/type_utils.h" -#include "common/math/math_util.h" #include "hybrid/node_executor/node_executor.h" namespace ge { -namespace { -const int kAlignment = 32; -} namespace hybrid { ShapeInferenceEngine::ShapeInferenceEngine(GraphExecutionContext *execution_context, SubgraphContext *subgraph_context) : execution_context_(execution_context), @@ -46,9 +40,7 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { } if (node_item.fused_subgraph != nullptr) { - GE_CHK_STATUS_RET_NOLOG(InferShapeForSubgraph(node_item, *node_item.fused_subgraph)); - GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item)); - return SUCCESS; + return InferShapeForSubgraph(node_item, *node_item.fused_subgraph); } // Skip shape inference for node of type DEPEND_COMPUTE @@ -71,15 +63,21 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { std::lock_guard lk(mu_); RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), - "Invoke InferShapeAndType failed."); + "Invoke InferShapeAndType failed."); RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End"); } + // Check again to make sure shape is valid after shape inference + if (node_item.shape_inference_type != DEPEND_SHAPE_RANGE) { + bool is_unknown_shape = false; + GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node_item.node, is_unknown_shape), + "Failed to get shape status. node = %s", + node_item.NodeName().c_str()); - // update output tensor sizes after shape inference - // error if shape is still unknown and not of type DEPEND_SHAPE_RANGE - RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start"); - GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item, node_item.shape_inference_type == DEPEND_SHAPE_RANGE)); - RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] End"); + GE_CHK_BOOL_RET_STATUS(!is_unknown_shape, + INTERNAL_ERROR, + "[%s] Shape is still unknown after shape inference.", + node_item.NodeName().c_str()); + } GELOGD("[%s] [HybridTrace] After shape inference. Node = %s", node_item.NodeName().c_str(), @@ -129,6 +127,8 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) { // propagate each output for (int i = 0; i < node_item.num_outputs; ++i) { auto output_desc = node_item.op_desc->MutableOutputDesc(i); + const auto &shape = output_desc->MutableShape(); + const auto &ori_shape = output_desc->GetOriginShape(); auto &output_nodes = node_item.outputs[i]; // propagate output to all sub-inputs @@ -149,7 +149,9 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) { infer_state.UpdateInputShapeFuture(dst_input_index_and_node.first, std::move(future)); } else { - GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, *output_desc)); + GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, + ori_shape, + shape)); } } } @@ -228,92 +230,5 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) { } return SUCCESS; } - -Status ShapeInferenceEngine::CanonicalizeShape(GeTensorDesc &tensor_desc, - std::vector &shape, - bool fallback_with_range) { - const auto &tensor_shape = tensor_desc.MutableShape(); - if (tensor_shape.IsUnknownShape()) { - if (!fallback_with_range) { - GELOGE(INTERNAL_ERROR, "Output shape is still unknown after shape inference. shape = [%s]", - tensor_shape.ToString().c_str()); - return INTERNAL_ERROR; - } - - GELOGD("Calc output size by range"); - std::vector> shape_range; - GE_CHK_GRAPH_STATUS_RET(tensor_desc.GetShapeRange(shape_range), "Failed to get shape range"); - if (shape_range.size() != shape.size()) { - GELOGE(INTERNAL_ERROR, "Number of shape ranges (%zu) mismatches that of dims (%zu)", - shape_range.size(), - shape.size()); - return INTERNAL_ERROR; - } - - for (size_t dim_index = 0; dim_index < shape.size(); ++dim_index) { - if (shape[dim_index] == ge::UNKNOWN_DIM) { - shape[dim_index] = shape_range[dim_index].second; - } - } - - GELOGD("After canonicalization, shape = [%s], before = [%s]", - GeShape(shape).ToString().c_str(), - tensor_shape.ToString().c_str()); - } - - return SUCCESS; -} - -Status ShapeInferenceEngine::CalcTensorSize(DataType data_type, - const std::vector &shape, - int64_t &tensor_size) { - GELOGD("To calc tensor size by shape = [%s]", GeShape(shape).ToString().c_str()); - uint32_t type_size; - if (!TypeUtils::GetDataTypeLength(data_type, type_size)) { - GELOGE(INTERNAL_ERROR, "Failed to get data type size"); - return INTERNAL_ERROR; - } - - tensor_size = type_size; - for (const auto &dim : shape) { - GE_CHECK_GE(dim, 0); - GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim), - "Shape size overflow, shape = [%s]", - GeShape(shape).ToString().c_str()); - tensor_size *= dim; - } - - GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1), - "Tensor size is too large: %ld, shape = [%s]", - tensor_size, - GeShape(shape).ToString().c_str()); - tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment; - return SUCCESS; -} - -Status ShapeInferenceEngine::CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range) { - auto op_desc = node_item.GetOpDesc(); - for (size_t output_index = 0; output_index < op_desc->GetOutputsSize(); ++output_index) { - auto tensor_desc = op_desc->MutableOutputDesc(output_index); - GE_CHECK_NOTNULL(tensor_desc); - const auto &shape = tensor_desc->MutableShape(); - // modify on copy - auto dims = shape.GetDims(); - GE_CHK_STATUS_RET(CanonicalizeShape(*tensor_desc, dims, fallback_with_range), - "[%s] Failed to canonicalize shape for output %zu", - node_item.NodeName().c_str(), - output_index); - - int64_t tensor_size; - GE_CHK_STATUS_RET(CalcTensorSize(tensor_desc->GetDataType(), dims, tensor_size), - "[%s] Failed to calc tensor size for output %zu", - node_item.NodeName().c_str(), - output_index); - GELOGD("[%s] Tensor size of output %zu = %ld", node_item.NodeName().c_str(), output_index, tensor_size); - (void) TensorUtils::SetSize(*tensor_desc, tensor_size); - } - - return SUCCESS; -} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/executor/worker/shape_inference_engine.h b/ge/hybrid/executor/worker/shape_inference_engine.h index b946577f..7bb9269c 100644 --- a/ge/hybrid/executor/worker/shape_inference_engine.h +++ b/ge/hybrid/executor/worker/shape_inference_engine.h @@ -34,11 +34,7 @@ class ShapeInferenceEngine { Status PropagateOutputShapes(const NodeItem &node_item); - static Status CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range = false); - private: - static Status CanonicalizeShape(GeTensorDesc &tensor_desc, std::vector &shape, bool fallback_with_range); - static Status CalcTensorSize(DataType data_type, const std::vector &shape, int64_t &tensor_size); static Status UpdatePeerNodeShape(const Node &node); Status AwaitDependentNodes(NodeState &node_state); diff --git a/ge/hybrid/executor/worker/task_compile_engine.cc b/ge/hybrid/executor/worker/task_compile_engine.cc index e2e94f66..f80374c6 100644 --- a/ge/hybrid/executor/worker/task_compile_engine.cc +++ b/ge/hybrid/executor/worker/task_compile_engine.cc @@ -26,6 +26,9 @@ Status TaskCompileEngine::Compile(NodeState &node_state, GraphExecutionContext * RECORD_COMPILE_EVENT(context, node_item.NodeName().c_str(), "[Compile] Start"); GE_CHK_RT_RET(rtCtxSetCurrent(context->rt_gen_context)); + if (context->ge_context != nullptr) { + GetThreadLocalContext() = *context->ge_context; + } shared_ptr kernel_task; auto ret = node_item.node_executor->CompileTask(*context->model, node_item.node, kernel_task); RECORD_COMPILE_EVENT(context, node_state.GetName().c_str(), "[Compile] End"); diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index d519c35b..b9b8e6d0 100644 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -21,6 +21,7 @@ #include "graph/build/memory/var_mem_assign_util.h" #include "graph/debug/ge_attr_define.h" #include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/new_model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/trans_var_data_utils.h" @@ -921,6 +922,7 @@ Status HybridModelBuilder::InitWeights() { } Status HybridModelBuilder::LoadTasks() { + GE_CHK_STATUS_RET(CheckAicpuOpList(), "Check Aicpu op failed."); for (auto &it : hybrid_model_.node_items_) { auto &node_item = it.second; auto &node_ptr = node_item->node; @@ -1557,5 +1559,29 @@ Status HybridModelBuilder::BuildInputMapping(GraphItem &graph_item, return SUCCESS; } + +Status HybridModelBuilder::CheckAicpuOpList() { + std::vector aicpu_optype_list; + std::vector aicpu_tf_optype_list; + std::set aicpu_optype_set; + std::set aicpu_tf_optype_set; + for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) { + auto &ge_model = it.second; + GE_CHECK_NOTNULL(ge_model); + if (ge::AttrUtils::GetListStr(*ge_model, "needCheckCpu", aicpu_optype_list)) { + aicpu_optype_set.insert(aicpu_optype_list.begin(), aicpu_optype_list.end()); + } + + if (ge::AttrUtils::GetListStr(*ge_model, "needCheckTf", aicpu_tf_optype_list)) { + aicpu_tf_optype_set.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end()); + } + } + // reset list with set + aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end()); + aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end()); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), + "Launch check aicpu op type failed."); + return SUCCESS; +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h index b90ec982..bb349d86 100644 --- a/ge/hybrid/model/hybrid_model_builder.h +++ b/ge/hybrid/model/hybrid_model_builder.h @@ -78,6 +78,7 @@ class HybridModelBuilder { Status ParseVarOutputs(NodeItem &node_item); Status LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item); Status RecoverGraphUnknownFlag(); + Status CheckAicpuOpList(); const char* GetGraphName() const { return hybrid_model_.model_name_.c_str(); diff --git a/ge/hybrid/model/node_item.cc b/ge/hybrid/model/node_item.cc index eb00f509..69cf334d 100644 --- a/ge/hybrid/model/node_item.cc +++ b/ge/hybrid/model/node_item.cc @@ -22,7 +22,6 @@ #include "graph/debug/ge_attr_define.h" #include "graph/utils/node_utils.h" #include "hybrid/node_executor/node_executor.h" -#include "hybrid/executor/worker/shape_inference_engine.h" namespace ge { namespace hybrid { @@ -48,7 +47,7 @@ Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgr GE_CHECK_NOTNULL(dst_op_desc); auto in_idx = node_and_anchor.second->GetIdx(); auto tensor_desc = dst_op_desc->MutableInputDesc(in_idx); - fused_subgraph.input_mapping[static_cast(parent_index)].emplace_back(tensor_desc); + fused_subgraph.input_mapping[parent_index].emplace_back(tensor_desc); GELOGD("Input[%u] mapped to [%s:%u]", parent_index, dst_op_desc->GetName().c_str(), in_idx); } @@ -65,7 +64,7 @@ Status ParseOutputMapping(const OpDescPtr &op_desc, FusedSubgraph &fused_subgrap return FAILED; } - fused_subgraph.output_mapping.emplace(static_cast(parent_index), op_desc); + fused_subgraph.output_mapping.emplace(parent_index, op_desc); return SUCCESS; } @@ -127,7 +126,12 @@ Status NodeItem::Create(const NodePtr &node, std::unique_ptr &node_ite return SUCCESS; } -void NodeItem::ResolveOptionalInputs() { +Status NodeItem::Init() { + GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX); + GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX); + num_inputs = static_cast(op_desc->GetInputsSize()); + num_outputs = static_cast(op_desc->GetOutputsSize()); + if (op_desc->GetAllInputsSize() != op_desc->GetInputsSize()) { has_optional_inputs = true; for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { @@ -139,18 +143,7 @@ void NodeItem::ResolveOptionalInputs() { } } } -} -Status NodeItem::InitInputsAndOutputs() { - GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX); - GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX); - num_inputs = static_cast(op_desc->GetInputsSize()); - num_outputs = static_cast(op_desc->GetOutputsSize()); - ResolveOptionalInputs(); - return SUCCESS; -} - -Status NodeItem::ResolveDynamicState() { (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic); if (!is_dynamic) { @@ -158,54 +151,38 @@ Status NodeItem::ResolveDynamicState() { "[%s] Failed to get shape status.", node->GetName().c_str()); } - return SUCCESS; -} -Status NodeItem::ResolveStaticInputsAndOutputs() { - for (int i = 0; i < num_inputs; ++i) { - const auto &input_desc = MutableInputDesc(i); - GE_CHECK_NOTNULL(input_desc); - if (input_desc->MutableShape().IsUnknownShape()) { - is_input_shape_static_.push_back(false); - } else { - num_static_input_shapes++; - is_input_shape_static_.push_back(true); - GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", - NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str()); + if (is_dynamic) { + for (int i = 0; i < num_inputs; ++i) { + const auto &input_desc = MutableInputDesc(i); + GE_CHECK_NOTNULL(input_desc); + if (input_desc->MutableShape().IsUnknownShape()) { + is_input_shape_static_.push_back(false); + } else { + num_static_input_shapes++; + is_input_shape_static_.push_back(true); + GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", + NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str()); + } } - } - for (int i = 0; i < num_outputs; ++i) { - const auto &output_desc = op_desc->MutableOutputDesc(i); - GE_CHECK_NOTNULL(output_desc); - if (output_desc->MutableShape().IsUnknownShape()) { - is_output_shape_static = false; - break; + for (int i = 0; i < num_outputs; ++i) { + const auto &output_desc = op_desc->MutableOutputDesc(i); + GE_CHECK_NOTNULL(output_desc); + if (output_desc->MutableShape().IsUnknownShape()) { + is_output_shape_static = false; + break; + } } - } - - if (is_output_shape_static) { - GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(*this)); - } - return SUCCESS; -} -void NodeItem::ResolveUnknownShapeType() { - if (IsControlOp() || node_type == PARTITIONEDCALL) { - shape_inference_type = DEPEND_COMPUTE; - } else { - int32_t unknown_shape_type_val = 0; - (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); - shape_inference_type = static_cast(unknown_shape_type_val); - } -} + if (IsControlOp() || node_type == PARTITIONEDCALL) { + shape_inference_type = DEPEND_COMPUTE; + } else { + int32_t unknown_shape_type_val = 0; + (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); + shape_inference_type = static_cast(unknown_shape_type_val); + } -Status NodeItem::Init() { - GE_CHK_STATUS_RET_NOLOG(InitInputsAndOutputs()); - GE_CHK_STATUS_RET_NOLOG(ResolveDynamicState()); - if (is_dynamic) { - ResolveUnknownShapeType(); - GE_CHK_STATUS_RET_NOLOG(ResolveStaticInputsAndOutputs()); GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str()); } diff --git a/ge/hybrid/model/node_item.h b/ge/hybrid/model/node_item.h index 99f0d83c..8fbdc648 100644 --- a/ge/hybrid/model/node_item.h +++ b/ge/hybrid/model/node_item.h @@ -103,11 +103,6 @@ struct NodeItem { private: explicit NodeItem(NodePtr node); Status Init(); - Status InitInputsAndOutputs(); - void ResolveOptionalInputs(); - Status ResolveDynamicState(); - Status ResolveStaticInputsAndOutputs(); - void ResolveUnknownShapeType(); std::vector is_input_shape_static_; std::vector input_desc_indices_; diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index f16bfb2f..77004f99 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -148,10 +148,6 @@ Status TaskContext::AllocateWorkspaces() { } Status TaskContext::RegisterCallback(const std::function &callback_fun) const { - if (callback_fun == nullptr) { - GELOGW("[%s] Callback is NULL", GetNodeName()); - return SUCCESS; - } auto ret = execution_context_->callback_manager->RegisterCallback(callback_fun); if (ret != SUCCESS) { GELOGE(ret, "[%s] Failed to register callback", GetNodeName()); @@ -388,20 +384,6 @@ const char *TaskContext::GetNodeName() const { return node_item_->NodeName().c_str(); } -void TaskContext::ReleaseInputsAndOutputs() { - for (int i = 0; i < node_item_->num_inputs; ++i) { - auto tensor = inputs_start_ + i; - tensor->Destroy(); - GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), i); - } - - for (int i = 0; i < node_item_->num_outputs; ++i) { - auto tensor = outputs_start_ + i; - tensor->Destroy(); - GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), i); - } -} - void TaskContext::ReleaseInput(int index) { auto input_tensor = MutableInput(index); if (input_tensor != nullptr) { @@ -474,9 +456,5 @@ Status TaskContext::TryExecuteCallback(const function &callback_fun) con const DumpProperties &TaskContext::GetDumpProperties() const { return execution_context_->dump_properties; } - -bool TaskContext::NeedCallback() { - return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0; -} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index 34754a14..0549a1dc 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -50,8 +50,6 @@ class TaskContext { ConstGeTensorDescPtr GetOutputDesc(int index) const; GeTensorDescPtr MutableInputDesc(int index) const; GeTensorDescPtr MutableOutputDesc(int index) const; - void ReleaseInputsAndOutputs(); - bool NeedCallback(); void ReleaseInput(int index); const TensorValue *GetInput(int index) const; const TensorValue *GetOutput(int index) const; diff --git a/ge/ir_build/atc_ir_common.cc b/ge/ir_build/atc_ir_common.cc index 77d749de..7d63514c 100644 --- a/ge/ir_build/atc_ir_common.cc +++ b/ge/ir_build/atc_ir_common.cc @@ -63,19 +63,18 @@ vector SplitInputShape(const std::string &input_shape) { } } // namespace -Status CheckInputFormat(const string &input_format) { +Status CheckInputFormat(const std::string &input_format) { if (input_format.empty()) { return ge::SUCCESS; } if (!ge::TypeUtils::IsFormatValid(input_format.c_str())) { ErrorManager::GetInstance().ATCReportErrMessage( - "E10001", {"parameter", "value", "reason"}, {"--input_format", input_format, "input format is invalid!"}); - GELOGE(ge::PARAM_INVALID, "input format [%s] is invalid!", input_format.c_str()); + "E10001", {"parameter", "value", "reason"}, {"--input_format", input_format, "input format not found"}); + GELOGE(ge::PARAM_INVALID, "user input format [%s] is not found!", input_format.c_str()); return ge::PARAM_INVALID; } return ge::SUCCESS; } - bool CheckDynamicBatchSizeInputShapeValid(unordered_map> shape_map, std::string &dynamic_batch_size) { int32_t size = 0; diff --git a/ge/ir_build/atc_ir_common.h b/ge/ir_build/atc_ir_common.h index b26c2f2b..1e4af73e 100644 --- a/ge/ir_build/atc_ir_common.h +++ b/ge/ir_build/atc_ir_common.h @@ -75,7 +75,7 @@ Status CheckInsertOpConfParamValid(const std::string insert_op_conf); Status CheckDisableReuseMemoryParamValid(const std::string disable_reuse_memory); Status CheckEnableSingleStreamParamValid(const std::string enable_single_stream); Status CheckImplmodeParamValid(const std::string &optypelist_for_implmode, std::string &op_select_implmode); -Status CheckInputFormat(const string &input_format); +Status CheckInputFormat(const std::string &input_format); void PrintOptionMap(std::map &options, std::string tips); void EraseEndSemicolon(std::string ¶m); } diff --git a/ge/offline/main.cc b/ge/offline/main.cc index b7188a85..46d25d97 100644 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -305,7 +305,7 @@ class GFlagUtils { " --debug_dir Set the save path of operator compilation intermediate files.\n" "Default value: ./kernel_meta\n" " --op_compiler_cache_dir Set the save path of operator compilation cache files.\n" - "Default value: $HOME/atc_data/kernel_cache\n" + "Default value: $HOME/atc_data\n" " --op_compiler_cache_mode Set the operator compilation cache mode." "Options are disable(default), enable and force(force to refresh the cache)"); diff --git a/ge/proto/caffe/caffe.proto b/ge/proto/caffe/caffe.proto index 3f45aae2..20615fed 100644 --- a/ge/proto/caffe/caffe.proto +++ b/ge/proto/caffe/caffe.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software caffe, version 1.0 https://github.com/BVLC/caffe + * + * This file is included by GraphEngine so as to support model format conversion from caffe model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto2"; package domi.caffe; diff --git a/ge/proto/op_mapping_info.proto b/ge/proto/op_mapping_info.proto index 7fb6f84b..e23b7ebe 100644 --- a/ge/proto/op_mapping_info.proto +++ b/ge/proto/op_mapping_info.proto @@ -15,7 +15,6 @@ message Output { int32 original_output_data_type = 7; int32 original_output_format = 8; uint64 size = 9; - Shape origin_shape = 10; } message Input { @@ -24,7 +23,6 @@ message Input { Shape shape = 3; uint64 address = 4; uint64 size = 5; - Shape origin_shape = 6; } enum BufferType { diff --git a/ge/proto/tensorflow/attr_value.proto b/ge/proto/tensorflow/attr_value.proto index 1cc67d62..438d7163 100644 --- a/ge/proto/tensorflow/attr_value.proto +++ b/ge/proto/tensorflow/attr_value.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/proto/tensorflow/function.proto b/ge/proto/tensorflow/function.proto index 075897c6..44681e32 100644 --- a/ge/proto/tensorflow/function.proto +++ b/ge/proto/tensorflow/function.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/proto/tensorflow/graph.proto b/ge/proto/tensorflow/graph.proto index d639a7d6..73bfc6ee 100644 --- a/ge/proto/tensorflow/graph.proto +++ b/ge/proto/tensorflow/graph.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/proto/tensorflow/graph_library.proto b/ge/proto/tensorflow/graph_library.proto index e393d38d..7bca0838 100644 --- a/ge/proto/tensorflow/graph_library.proto +++ b/ge/proto/tensorflow/graph_library.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/proto/tensorflow/node_def.proto b/ge/proto/tensorflow/node_def.proto index b9bc97ee..50cf5cac 100644 --- a/ge/proto/tensorflow/node_def.proto +++ b/ge/proto/tensorflow/node_def.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/proto/tensorflow/op_def.proto b/ge/proto/tensorflow/op_def.proto index 3485d045..7f0e8ce2 100644 --- a/ge/proto/tensorflow/op_def.proto +++ b/ge/proto/tensorflow/op_def.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/proto/tensorflow/resource_handle.proto b/ge/proto/tensorflow/resource_handle.proto index a3452351..91c46c9a 100644 --- a/ge/proto/tensorflow/resource_handle.proto +++ b/ge/proto/tensorflow/resource_handle.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/proto/tensorflow/tensor.proto b/ge/proto/tensorflow/tensor.proto index d0a4d024..48eeb6c4 100644 --- a/ge/proto/tensorflow/tensor.proto +++ b/ge/proto/tensorflow/tensor.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/proto/tensorflow/tensor_shape.proto b/ge/proto/tensorflow/tensor_shape.proto index 4225a2e3..3a6d8c5a 100644 --- a/ge/proto/tensorflow/tensor_shape.proto +++ b/ge/proto/tensorflow/tensor_shape.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + // Protocol buffer representing the shape of tensors. syntax = "proto3"; diff --git a/ge/proto/tensorflow/types.proto b/ge/proto/tensorflow/types.proto index ba7a72b3..f40e49cb 100644 --- a/ge/proto/tensorflow/types.proto +++ b/ge/proto/tensorflow/types.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/proto/tensorflow/versions.proto b/ge/proto/tensorflow/versions.proto index 48061218..4e81548f 100644 --- a/ge/proto/tensorflow/versions.proto +++ b/ge/proto/tensorflow/versions.proto @@ -1,3 +1,11 @@ +/** + * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow + * + * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. + * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). + * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. + */ + syntax = "proto3"; package domi.tensorflow; diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index 20b1354d..51c3e845 100644 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -119,11 +119,11 @@ Status OpTask::DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_works uintptr_t *arg_base = nullptr; size_t arg_num = 0; GetIoAddr(arg_base, arg_num); - if (arg_num != all_addresses.size()) { - GELOGE(INTERNAL_ERROR, "[%s] arg number mismatches, expect = %zu, but got = %zu", + if (arg_num < all_addresses.size()) { + GELOGE(INTERNAL_ERROR, "[%s] arg number mismatches, expect at least = %zu, but got = %zu", op_desc_->GetName().c_str(), - arg_num, - all_addresses.size()); + all_addresses.size(), + arg_num); return INTERNAL_ERROR; } diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h index e3ec3a3c..d2373525 100644 --- a/inc/external/acl/error_codes/rt_error_codes.h +++ b/inc/external/acl/error_codes/rt_error_codes.h @@ -1,101 +1,101 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__ -#define __INC_EXTERNEL_RT_ERROR_CODES_H__ - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -static const int32_t ACL_RT_SUCCESS = 0; // success - -static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid -static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id -static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null -static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context -static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context -static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal -static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned -static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed -static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed -static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream -static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread -static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set -static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create -static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream -static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type -static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle -static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type - -static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support -static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error -static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error -static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow -static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device -static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail -static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission -static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource -static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource -static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource -static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource - -static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error -static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error -static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream -static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream -static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete -static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence -static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete -static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error -static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error -static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support -static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat -static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed -static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout -static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error -static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout -static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception -static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception -static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout -static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception -static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error -static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error -static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error -static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error -static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal -static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering -static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init -static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data -static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error -static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate -static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed -static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed -static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context -static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out -static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error - -static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error - -#ifdef __cplusplus -} -#endif - -#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__ +#define __INC_EXTERNEL_RT_ERROR_CODES_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static const int32_t ACL_RT_SUCCESS = 0; // success + +static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid +static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id +static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null +static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context +static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context +static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal +static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned +static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed +static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed +static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream +static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread +static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set +static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create +static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream +static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type +static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle +static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type + +static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support +static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error +static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error +static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow +static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device +static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail +static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission +static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource +static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource +static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource +static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource + +static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error +static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error +static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream +static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream +static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete +static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence +static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete +static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error +static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error +static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support +static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat +static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed +static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout +static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error +static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout +static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception +static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception +static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout +static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception +static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error +static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error +static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error +static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error +static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal +static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering +static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init +static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data +static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error +static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate +static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed +static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed +static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context +static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out +static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error + +static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error + +#ifdef __cplusplus +} +#endif + +#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ diff --git a/inc/external/acl/ops/acl_fv.h b/inc/external/acl/ops/acl_fv.h index 27dc367a..4dc518c5 100644 --- a/inc/external/acl/ops/acl_fv.h +++ b/inc/external/acl/ops/acl_fv.h @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ #define INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index 9b361b96..f2d083ae 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -293,7 +293,6 @@ const std::string MDL_BANK_PATH_FLAG = "ge.mdl_bank_path"; // Configure op bank path const std::string OP_BANK_PATH_FLAG = "ge.op_bank_path"; -const std::string OP_BANK_UPDATE_FLAG = "ge.op_bank_update"; // Graph run mode enum GraphRunMode { PREDICTION = 0, TRAIN }; @@ -367,7 +366,6 @@ static const char *const OP_COMPILER_CACHE_DIR = ge::OP_COMPILER_CACHE_DIR; static const char *const OP_COMPILER_CACHE_MODE = ge::OP_COMPILER_CACHE_MODE; static const char *const MDL_BANK_PATH = ge::MDL_BANK_PATH_FLAG.c_str(); static const char *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str(); -static const char *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str(); static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str(); // for interface: aclgrphBuildModel @@ -391,8 +389,7 @@ const std::set ir_builder_suppported_options = {INPUT_FORMAT, OP_COMPILER_CACHE_DIR, OP_COMPILER_CACHE_MODE, MDL_BANK_PATH, - OP_BANK_PATH, - OP_BANK_UPDATE}; + OP_BANK_PATH}; // for interface: aclgrphParse const std::set ir_parser_suppported_options = { diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h index e3ec3a3c..34202314 100644 --- a/inc/external/runtime/rt_error_codes.h +++ b/inc/external/runtime/rt_error_codes.h @@ -1,101 +1,95 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__ -#define __INC_EXTERNEL_RT_ERROR_CODES_H__ - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -static const int32_t ACL_RT_SUCCESS = 0; // success - -static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid -static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id -static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null -static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context -static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context -static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal -static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned -static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed -static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed -static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream -static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread -static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set -static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create -static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream -static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type -static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle -static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type - -static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support -static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error -static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error -static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow -static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device -static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail -static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission -static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource -static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource -static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource -static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource - -static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error -static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error -static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream -static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream -static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete -static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence -static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete -static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error -static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error -static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support -static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat -static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed -static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout -static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error -static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout -static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception -static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception -static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout -static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception -static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error -static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error -static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error -static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error -static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal -static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering -static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init -static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data -static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error -static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate -static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed -static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed -static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context -static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out -static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error - -static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error - -#ifdef __cplusplus -} -#endif - -#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ +/** + * @file rt_error_codes.h + * + * Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__ +#define __INC_EXTERNEL_RT_ERROR_CODES_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static const int32_t ACL_RT_SUCCESS = 0; // success + +static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid +static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id +static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null +static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context +static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context +static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal +static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned +static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed +static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed +static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream +static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread +static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set +static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create +static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream +static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type +static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle +static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type + +static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support +static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error +static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error +static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow +static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device +static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail +static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission +static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource +static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource +static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource +static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource + +static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error +static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error +static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream +static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream +static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete +static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence +static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete +static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error +static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error +static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support +static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat +static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed +static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout +static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error +static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout +static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception +static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception +static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout +static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception +static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error +static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error +static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error +static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error +static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal +static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering +static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init +static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data +static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error +static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate +static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed +static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed +static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context +static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out +static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error + +static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error + +#ifdef __cplusplus +} +#endif + +#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index eea9824b..254dbada 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -37,9 +37,7 @@ enum FrameworkType { MINDSPORE = 1, TENSORFLOW = 3, ANDROID_NN, -#ifndef ONLY_COMPILE_OPEN_SRC ONNX, -#endif FRAMEWORK_RESERVED, }; diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 1b78860d..5a73126f 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -30,6 +30,8 @@ #include "runtime/base.h" namespace ge { +class ModelListenerAdapter; + class SingleOp; class DynamicSingleOp; @@ -53,8 +55,14 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { ge::Status Initialize(); ge::Status Finalize(); + // Load model + ge::Status LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key, int32_t priority, + std::shared_ptr listener); + ge::Status UnloadModel(uint32_t modelId); + ge::Status RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data); + // Get input and output descriptor ge::Status GetModelDescInfo(uint32_t model_id, std::vector &input_desc, std::vector &output_desc, bool new_model_desc = false); @@ -160,6 +168,9 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector &input_desc, std::vector &output_desc); + ge::Status LoadModel(uint32_t &model_id, const ge::ModelData &model_data, + std::shared_ptr listener); + ge::Status CommandHandle(const ge::Command &command); ge::Status SetDump(const DumpConfig &dump_config); @@ -286,6 +297,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { private: static bool isInit_; }; + +ge::Status ModelInfoParser(const ge::ModelData &model, ge::ModelInfo &model_info); } // namespace ge #endif // INC_FRAMEWORK_EXECUTOR_GE_EXECUTOR_H_ diff --git a/inc/framework/omg/parser/model_parser.h b/inc/framework/omg/parser/model_parser.h index 57cff9a7..858d2578 100644 --- a/inc/framework/omg/parser/model_parser.h +++ b/inc/framework/omg/parser/model_parser.h @@ -57,7 +57,6 @@ class ModelParser { * @ingroup domi_omg * @brief Parse relevant data from memory and save it to graph * @param [in] input Model file memory data - * @param [in] input Model file memory size * @param [in|out] graph A graph for saving the model information after analysis * @return SUCCESS * @return FAILED @@ -65,7 +64,6 @@ class ModelParser { */ virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; -#ifndef ONLY_COMPILE_OPEN_SRC /** * @ingroup domi_omg * @brief Parse relevant data from memory and save it to graph @@ -77,7 +75,6 @@ class ModelParser { * @author */ virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0; -#endif /** * @ingroup domi_omg diff --git a/metadef/CMakeLists.txt b/metadef/CMakeLists.txt index 5ad573a6..965e36bd 100755 --- a/metadef/CMakeLists.txt +++ b/metadef/CMakeLists.txt @@ -33,9 +33,11 @@ if (ENABLE_OPEN_SRC) message(FATAL_ERROR "Running on a unsupported architecture: ${SYSTEM_TYPE}, build terminated") endif() set(GE_LIB_PATH ${GE_LIB_PATH}/${GE_SYS_ARCH}) - find_module(slog libslog.so ${GE_LIB_PATH}) + find_module(slog libalog.so ${GE_LIB_PATH}) find_module(static_mmpa libmmpa.a ${GE_LIB_PATH}) find_module(error_manager liberror_manager.so ${GE_LIB_PATH}) + elseif(ENABLE_GE_COV OR ENABLE_GE_UT) + message(STATUS "Running on llt mode, no need to depend other component.") else() if(DEFINED ENV{ASCEND_CUSTOM_PATH}) set(ASCEND_DIR $ENV{ASCEND_CUSTOM_PATH}) @@ -45,7 +47,7 @@ if (ENABLE_OPEN_SRC) set(ASCEND_ATC_DIR ${ASCEND_DIR}/atc/lib64) - find_module(slog libslog.so ${ASCEND_ATC_DIR}) + find_module(slog libalog.so ${ASCEND_ATC_DIR}) find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) endif() diff --git a/metadef/OWNERS b/metadef/OWNERS index 8c42c84f..e97d0497 100644 --- a/metadef/OWNERS +++ b/metadef/OWNERS @@ -2,6 +2,8 @@ approvers: - ji_chen - wqtshg - ljl0711 +- startzgf168 +- lbisdaddy reviewers: - xchu42 - sheng-nan diff --git a/metadef/cmake/intf_pub_linux.cmake b/metadef/cmake/intf_pub_linux.cmake index b8f346cf..4173e362 100755 --- a/metadef/cmake/intf_pub_linux.cmake +++ b/metadef/cmake/intf_pub_linux.cmake @@ -4,7 +4,7 @@ endif() add_library(intf_pub INTERFACE) -target_compile_options(intf_pub INTERFACE +target_compile_options(intf_pub INTERFACE -Wall -fPIC $,-fstack-protector-all,-fstack-protector-strong> @@ -16,6 +16,7 @@ target_compile_definitions(intf_pub INTERFACE $<$:CFG_BUILD_DEBUG> WIN64=1 LINUX=0 + LOG_CPP ) target_link_options(intf_pub INTERFACE -Wl,-z,relro diff --git a/metadef/graph/option/ge_context.cc b/metadef/graph/option/ge_context.cc index ee9f4a1a..b4fe50c8 100644 --- a/metadef/graph/option/ge_context.cc +++ b/metadef/graph/option/ge_context.cc @@ -22,7 +22,7 @@ namespace ge { namespace { -const int64_t kMinTrainingTraceJobId = 256; +const int64_t kMinTrainingTraceJobId = 65536; const int kDecimal = 10; const char *kHostExecPlacement = "HOST"; } diff --git a/metadef/graph/proto/op_mapping_info.proto b/metadef/graph/proto/op_mapping_info.proto index 7fb6f84b..e23b7ebe 100644 --- a/metadef/graph/proto/op_mapping_info.proto +++ b/metadef/graph/proto/op_mapping_info.proto @@ -15,7 +15,6 @@ message Output { int32 original_output_data_type = 7; int32 original_output_format = 8; uint64 size = 9; - Shape origin_shape = 10; } message Input { @@ -24,7 +23,6 @@ message Input { Shape shape = 3; uint64 address = 4; uint64 size = 5; - Shape origin_shape = 6; } enum BufferType { diff --git a/metadef/inc/common/proto/op_mapping_info.proto b/metadef/inc/common/proto/op_mapping_info.proto index 7fb6f84b..e23b7ebe 100644 --- a/metadef/inc/common/proto/op_mapping_info.proto +++ b/metadef/inc/common/proto/op_mapping_info.proto @@ -15,7 +15,6 @@ message Output { int32 original_output_data_type = 7; int32 original_output_format = 8; uint64 size = 9; - Shape origin_shape = 10; } message Input { @@ -24,7 +23,6 @@ message Input { Shape shape = 3; uint64 address = 4; uint64 size = 5; - Shape origin_shape = 6; } enum BufferType { diff --git a/metadef/inc/common/util/error_manager/error_manager.h b/metadef/inc/common/util/error_manager/error_manager.h index 37c1f96c..d316dc54 100644 --- a/metadef/inc/common/util/error_manager/error_manager.h +++ b/metadef/inc/common/util/error_manager/error_manager.h @@ -74,17 +74,13 @@ class ErrorManager { const std::vector &value = {}); /// - /// @brief report graph compile failed message such as error code and op_name in mstune case + /// @brief report graph compile failed message such as error code and op_name in mustune case + /// @param [in] root_graph_name: root graph name /// @param [in] msg: failed message map, key is error code, value is op_name /// @return int 0(success) -1(fail) /// - int ReportMstuneCompileFailedMsg(const std::map &msg); - - /// - /// @brief save graph compile failed message from thread local map to global map - /// @param [in] graph_name: graph name - /// - void SaveMstuneCompileFailedMsg(const std::string &graph_name); + int ReportMstuneCompileFailedMsg(const std::string &root_graph_name, + const std::map &msg); /// /// @brief get graph compile failed message in mstune case @@ -92,8 +88,8 @@ class ErrorManager { /// @param [out] msg_map: failed message map, key is error code, value is op_name list /// @return int 0(success) -1(fail) /// - int GetMstuneCompileFailedMsg(const std::string &graph_name, - std::map> &msg_map); private: @@ -115,6 +111,9 @@ class ErrorManager { int ReadJsonFile(const std::string &file_path, void *handle); + void ClassifyMsg(const std::map &msg, + std::map> &classified_msg); + bool is_init_ = false; std::mutex mutex_; std::map error_map_; diff --git a/metadef/inc/common/util/platform_info.h b/metadef/inc/common/util/platform_info.h index af12fc85..ab80f830 100644 --- a/metadef/inc/common/util/platform_info.h +++ b/metadef/inc/common/util/platform_info.h @@ -19,8 +19,12 @@ #include #include +#include #include "platform_info_def.h" -#include "platform_infos_def.h" + +using std::map; +using std::vector; +using std::string; namespace fe { class PlatformInfoManager { @@ -32,143 +36,66 @@ class PlatformInfoManager { uint32_t InitializePlatformInfo(); uint32_t Finalize(); - uint32_t GetPlatformInfo(const std::string SoCVersion, - PlatformInfo &platform_info, - OptionalInfo &opti_compilation_info); + uint32_t GetPlatformInfo(const string SoCVersion, PlatformInfo &platform_info, OptionalInfo &opti_compilation_info); uint32_t GetPlatformInfoWithOutSocVersion(PlatformInfo &platform_info, OptionalInfo &opti_compilation_info); void SetOptionalCompilationInfo(OptionalInfo &opti_compilation_info); - uint32_t GetPlatformInfos(const std::string SoCVersion, - PlatFormInfos &platform_info, - OptionalInfos &opti_compilation_info); - - uint32_t GetPlatformInfoWithOutSocVersion(PlatFormInfos &platform_info, OptionalInfos &opti_compilation_info); - - void SetOptionalCompilationInfo(OptionalInfos &opti_compilation_info); - private: PlatformInfoManager(); ~PlatformInfoManager(); - uint32_t LoadIniFile(std::string ini_file_real_path); + uint32_t LoadIniFile(string ini_file_real_path); - void Trim(std::string &str); + void Trim(string &str); - uint32_t LoadConfigFile(std::string real_path); + uint32_t LoadConfigFile(string real_path); - std::string RealPath(const std::string &path); + string RealPath(const std::string &path); - std::string GetSoFilePath(); + string GetSoFilePath(); - void ParseVersion(std::map &version_map, - std::string &soc_version, - PlatformInfo &platform_info_temp); + void ParseVersion(map &version_map, string &soc_version, PlatformInfo &platform_info_temp); - void ParseSocInfo(std::map &soc_info_map, - PlatformInfo &platform_info_temp); + void ParseSocInfo(map &soc_info_map, PlatformInfo &platform_info_temp); - void ParseCubeOfAICoreSpec(std::map &ai_core_spec_map, - PlatformInfo &platform_info_temp); + void ParseCubeOfAICoreSpec(map &ai_core_spec_map, PlatformInfo &platform_info_temp); - void ParseBufferOfAICoreSpec(std::map &ai_core_spec_map, - PlatformInfo &platform_info_temp); + void ParseBufferOfAICoreSpec(map &ai_core_spec_map, PlatformInfo &platform_info_temp); - void ParseUBOfAICoreSpec(std::map &ai_core_spec_map, - PlatformInfo &platform_info_temp); + void ParseUBOfAICoreSpec(map &ai_core_spec_map, PlatformInfo &platform_info_temp); - void ParseUnzipOfAICoreSpec(std::map &ai_core_spec_map, - PlatformInfo &platform_info_temp); + void ParseUnzipOfAICoreSpec(map &ai_core_spec_map, PlatformInfo &platform_info_temp); - void ParseAICoreSpec(std::map &ai_core_spec_map, - PlatformInfo &platform_info_temp); + void ParseAICoreSpec(map &ai_core_spec_map, PlatformInfo &platform_info_temp); - void ParseBufferOfAICoreMemoryRates(std::map &ai_core_memory_rates_map, - PlatformInfo &platform_info_temp); + void ParseBufferOfAICoreMemoryRates(map &ai_core_memory_rates_map, PlatformInfo &platform_info_temp); - void ParseAICoreMemoryRates(std::map &ai_core_memory_rates_map, - PlatformInfo &platform_info_temp); + void ParseAICoreMemoryRates(map &ai_core_memory_rates_map, PlatformInfo &platform_info_temp); - void ParseUBOfAICoreMemoryRates(std::map &ai_core_memory_rates_map, - PlatformInfo &platform_info_temp); + void ParseUBOfAICoreMemoryRates(map &ai_core_memory_rates_map, PlatformInfo &platform_info_temp); - void ParseAICoreintrinsicDtypeMap(std::map &ai_coreintrinsic_dtype_map, - PlatformInfo &platform_info_temp); + void ParseAICoreintrinsicDtypeMap(map &ai_coreintrinsic_dtype_map, PlatformInfo &platform_info_temp); - void ParseVectorCoreSpec(std::map &vector_core_spec_map, - PlatformInfo &platform_info_temp); + void ParseVectorCoreSpec(map &vector_core_spec_map, PlatformInfo &platform_info_temp); - void ParseVectorCoreMemoryRates(std::map &vector_core_memory_rates_map, - PlatformInfo &platform_info_temp); + void ParseVectorCoreMemoryRates(map &vector_core_memory_rates_map, PlatformInfo &platform_info_temp); - void ParseCPUCache(std::map &CPUCacheMap, - PlatformInfo &platform_info_temp); + void ParseCPUCache(map &CPUCacheMap, PlatformInfo &platform_info_temp); - void ParseVectorCoreintrinsicDtypeMap(std::map &vector_coreintrinsic_dtype_map, + void ParseVectorCoreintrinsicDtypeMap(map &vector_coreintrinsic_dtype_map, PlatformInfo &platform_info_temp); - uint32_t ParsePlatformInfoFromStrToStruct(std::map> &content_info_map, - std::string &soc_version, + uint32_t ParsePlatformInfoFromStrToStruct(map> &content_info_map, string &soc_version, PlatformInfo &platform_info_temp); - void ParseVersion(std::map &version_map, - std::string &soc_version, - PlatFormInfos &platform_info_temp); - - void ParseSocInfo(std::map &soc_info_map, PlatFormInfos &platform_info_temp); - - void ParseCubeOfAICoreSpec(std::map &ai_core_spec_map, - PlatFormInfos &platform_info_temp); - - void ParseBufferOfAICoreSpec(std::map &ai_core_spec_map, - PlatFormInfos &platform_info_temp); - - void ParseUBOfAICoreSpec(std::map &ai_core_spec_map, - PlatFormInfos &platform_info_temp); - - void ParseUnzipOfAICoreSpec(std::map &ai_core_spec_map, - PlatFormInfos &platform_info_temp); - - void ParseAICoreSpec(std::map &ai_core_spec_map, - PlatFormInfos &platform_info_temp); - - void ParseBufferOfAICoreMemoryRates(std::map &ai_core_memory_rates_map, - PlatFormInfos &platform_info_temp); - - void ParseAICoreMemoryRates(std::map &ai_core_memory_rates_map, - PlatFormInfos &platform_info_temp); - - void ParseUBOfAICoreMemoryRates(std::map &ai_core_memory_rates_map, - PlatFormInfos &platform_info_temp); - - void ParseAICoreintrinsicDtypeMap(std::map &ai_coreintrinsic_dtype_map, - PlatFormInfos &platform_info_temp); - - void ParseVectorCoreSpec(std::map &vector_core_spec_map, - PlatFormInfos &platform_info_temp); - - void ParseVectorCoreMemoryRates(std::map &vector_core_memory_rates_map, - PlatFormInfos &platform_info_temp); - - void ParseCPUCache(std::map &CPUCacheMap, - PlatFormInfos &platform_info_temp); - - void ParseVectorCoreintrinsicDtypeMap(std::map &vector_coreintrinsic_dtype_map, - PlatFormInfos &platform_info_temp); - - uint32_t ParsePlatformInfo(std::map> &content_info_map, - std::string &soc_version, - PlatFormInfos &platform_info_temp); - - uint32_t AssemblePlatformInfoVector(std::map> &content_info_map); + uint32_t AssemblePlatformInfoVector(map> &content_info_map); private: bool init_flag_; - std::map platform_info_map_; + map platform_info_map_; OptionalInfo opti_compilation_info_; - std::map platform_infos_map_; - OptionalInfos opti_compilation_infos_; }; } // namespace fe #endif diff --git a/metadef/inc/common/util/platform_infos_def.h b/metadef/inc/common/util/platform_infos_def.h deleted file mode 100644 index be72061c..00000000 --- a/metadef/inc/common/util/platform_infos_def.h +++ /dev/null @@ -1,283 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PLATFORM_INFOS_DEF_H -#define PLATFORM_INFOS_DEF_H - -#include -#include -#include -#include -#include "platform_info_def.h" - -namespace fe { -class StrInfoImpl; -using StrInfoImplPtr = std::shared_ptr; -class StrInfos { - public: - bool Init(); - std::string GetAIcVersion(); - std::string GetCcecAIcVersion(); - std::string GetCcecAIvVersion(); - std::string IsSupportAICpuCompiler(); - - void SetAIcVersion(std::string &aic_version); - void SetCcecAIcVersion(std::string &ccec_aic_version); - void SetCcecAIvVersion(std::string &ccec_aiv_version); - void SetIsSupportAICpuCompiler(std::string &is_support_ai_cpu_compiler); - private: - StrInfoImplPtr str_info_impl_{nullptr}; -}; - -class SoCInfoImpl; -using SoCInfoImplPtr = std::shared_ptr; -class SoCInfos { - public: - bool Init(); - uint32_t GetAICoreCnt(); - uint32_t GetVectorCoreCnt(); - uint32_t GetAICpuCnt(); - MemoryType GetMemType(); - uint64_t GetMemSize(); - L2Type GetL2Type(); - uint64_t GetL2Size(); - uint32_t GetL2PageNum(); - - void SetAICoreCnt(uint32_t ai_core_cnt); - void SetVectorCoreCnt(uint32_t vector_core_cnt); - void SetAICpuCnt(uint32_t ai_cpu_cnt); - void SetMemType(MemoryType memory_type); - void SetMemSize(uint64_t memory_size); - void SetL2Type(L2Type l2_type); - void SetL2Size(uint64_t l2_size); - void SetL2PageNum(uint32_t l2_page_num); - private: - SoCInfoImplPtr soc_info_impl_{nullptr}; -}; - -class AICoreSpecImpl; -using AICoreSpecImplPtr = std::shared_ptr; -class AICoreSpecs { - public: - bool Init(); - double GetCubeFreq(); - uint64_t GetCubeMSize(); - uint64_t GetCubeNSize(); - uint64_t GetCubeKSize(); - uint64_t GetVecCalcSize(); - uint64_t GetL0aSize(); - uint64_t GetL0bSize(); - uint64_t GetL0cSize(); - uint64_t GetL1Size(); - uint64_t GetSmaskBuffer(); - uint64_t GetUBSize(); - uint64_t GetUBBlockSize(); - uint64_t GetUBBankSize(); - uint64_t GetUBBankNum(); - uint64_t GetUBBurstInOneBlock(); - uint64_t GetUBBankGroupNum(); - uint32_t GetUnzipEngines(); - uint32_t GetUnzipMaxRatios(); - uint32_t GetUnzipChannels(); - uint8_t GetUnzipIsTight(); - uint8_t GetCubeVectorSplit(); - - void SetCubeFreq(double cube_freq); - void SetCubeMSize(uint64_t cube_m_size); - void SetCubeNSize(uint64_t cube_n_size); - void SetCubeKSize(uint64_t cube_k_size); - void SetVecCalcSize(uint64_t vec_calc_size); - void SetL0aSize(uint64_t l0_a_size); - void SetL0bSize(uint64_t l0_b_size); - void SetL0cSize(uint64_t l0_c_size); - void SetL1Size(uint64_t l1_size); - void SetSmaskBuffer(uint64_t smask_buffer); - void SetUBSize(uint64_t ub_size); - void SetUBBlockSize(uint64_t ubblock_size); - void SetUBBankSize(uint64_t ubbank_size); - void SetUBBankNum(uint64_t ubbank_num); - void SetUBBurstInOneBlock(uint64_t ubburst_in_one_block); - void SetUBBankGroupNum(uint64_t ubbank_group_num); - void SetUnzipEngines(uint32_t unzip_engines); - void SetUnzipMaxRatios(uint32_t unzip_max_ratios); - void SetUnzipChannels(uint32_t unzip_channels); - void SetUnzipIsTight(uint8_t unzip_is_tight); - void SetCubeVectorSplit(uint8_t cube_vector_split); - private: - AICoreSpecImplPtr aicore_spec_impl_{nullptr}; -}; - -class AICoreMemRateImpl; -using AICoreMemRateImplPtr = std::shared_ptr; -class AICoreMemRates { - public: - bool Init(); - double GetDdrRate(); - double GetDdrReadRate(); - double GetDdrWriteRate(); - double GetL2Rate(); - double GetL2ReadRate(); - double GetL2WriteRate(); - double GetL1ToL0aRate(); - double GetL1ToL0bRate(); - double GetL1ToUBRate(); - double GetL0cToUBRate(); - double GetUBToL2Rate(); - double GetUBToDdrRate(); - double GetUBToL1Rate(); - - void SetDdrRate(double ddr_rate); - void SetDdrReadRate(double ddr_read_rate); - void SetDdrWriteRate(double ddr_write_rate); - void SetL2Rate(double l2_rate); - void SetL2ReadRate(double l2_read_rate); - void SetL2WriteRate(double l2_write_rate); - void SetL1ToL0aRate(double l1_to_l0_a_rate); - void SetL1ToL0bRate(double l1_to_l0_b_rate); - void SetL1ToUBRate(double l1_to_ub_rate); - void SetL0cToUBRate(double l0_c_to_ub_rate); - void SetUBToL2Rate(double ub_to_l2_rate); - void SetUBToDdrRate(double ub_to_ddr_rate); - void SetUBToL1Rate(double ub_to_l1_rate); - private: - AICoreMemRateImplPtr aicore_mem_rate_impl_{nullptr}; -}; - -class VectorCoreSpecImpl; -using VectorCoreSpecImplPtr = std::shared_ptr; -class VectorCoreSpecs { - public: - bool Init(); - double GetVecFreq(); - uint64_t GetVecCalcSize(); - uint64_t GetSmaskBuffer(); - uint64_t GetUBSize(); - uint64_t GetUBBlockSize(); - uint64_t GetUBBankSize(); - uint64_t GetUBBankNum(); - uint64_t GetUBBurstInOneBlock(); - uint64_t GetUBBankGroupNum(); - uint64_t GetVectorRegSize(); - uint64_t GetPredicateRegSize(); - uint64_t GetAddressRegSize(); - uint64_t GetAlignmentRegSize(); - - void SetVecFreq(double vec_freq); - void SetVecCalcSize(uint64_t vec_calc_size); - void SetSmaskBuffer(uint64_t smask_buffer); - void SetUBSize(uint64_t ub_size); - void SetUBBlockSize(uint64_t ubblock_size); - void SetUBBankSize(uint64_t ubbank_size); - void SetUBBankNum(uint64_t ubbank_num); - void SetUBBurstInOneBlock(uint64_t ubburst_in_one_block); - void SetUBBankGroupNum(uint64_t ubbank_group_num); - void SetVectorRegSize(uint64_t vector_reg_size); - void SetPredicateRegSize(uint64_t predicate_reg_size); - void SetAddressRegSize(uint64_t address_reg_size); - void SetAlignmentRegSize(uint64_t alignment_reg_size); - private: - VectorCoreSpecImplPtr vector_core_spec_impl_{nullptr}; -}; - -class VectorCoreMemRateImpl; -using VectorCoreMemRateImplPtr = std::shared_ptr; -class VectorCoreMemRates { - public: - bool Init(); - double GetDdrRate(); - double GetDdrReadRate(); - double GetDdrWriteRate(); - double GetL2Rate(); - double GetL2ReadRate(); - double GetL2WriteRate(); - double GetUBToL2Rate(); - double GetUBToDdrRate(); - - void SetDdrRate(double ddr_rate); - void SetDdrReadRate(double ddr_read_rate); - void SetDdrWriteRate(double ddr_write_rate); - void SetL2Rate(double l2_rate); - void SetL2ReadRate(double l2_read_rate); - void SetL2WriteRate(double l2_write_rate); - void SetUBToL2Rate(double ub_to_l2_rate); - void SetUBToDdrRate(double ub_to_ddr_rate); - private: - VectorCoreMemRateImplPtr vector_core_mem_rate_impl_{nullptr}; -}; - -class CPUCacheImpl; -using CPUCacheImplPtr = std::shared_ptr; -class CPUCaches { - public: - bool Init(); - uint32_t GetAICPUSyncBySW(); - uint32_t GetTSCPUSyncBySW(); - - void SetAICPUSyncBySW(uint32_t AICPUSyncBySW); - void SetTSCPUSyncBySW(uint32_t TSCPUSyncBySW); - private: - CPUCacheImplPtr cpu_cache_impl_{nullptr}; -}; - -class PlatFormInfosImpl; -using PlatFormInfosImplPtr = std::shared_ptr; -class PlatFormInfos { - public: - bool Init(); - StrInfos GetStrInfo(); - SoCInfos GetSocInfo(); - AICoreSpecs GetAICoreSpec(); - AICoreMemRates GetAICoreMemRates(); - std::map> GetAICoreIntrinsicDtype(); - VectorCoreSpecs GetVectorCoreSpec(); - VectorCoreMemRates GetVectorCoreMemRates(); - CPUCaches GetCPUCache(); - std::map> GetVectorCoreIntrinsicDtype(); - - void SetStrInfo(StrInfos &str_infos); - void SetSocInfo(SoCInfos &SoC_infos); - void SetAICoreSpec(AICoreSpecs &AICore_specs); - void SetAICoreMemRates(AICoreMemRates &AICore_mem_rates); - void SetAICoreIntrinsicDtype(std::map> &intrinsic_dtypes); - void SetVectorCoreSpec(VectorCoreSpecs &vector_core_specs); - void SetVectorCoreMemRates(VectorCoreMemRates &vectorcore_mem_rates); - void SetCPUCache(CPUCaches &CPU_caches); - void SetVectorCoreIntrinsicDtype(std::map> &intrinsic_dtypes); - - private: - PlatFormInfosImplPtr platform_infos_impl_{nullptr}; -}; - -class OptionalInfosImpl; -using OptionalInfosImplPtr = std::shared_ptr; -class OptionalInfos { - public: - bool Init(); - std::string GetSocVersion(); - std::string GetCoreType(); - uint32_t GetAICoreNum(); - std::string GetL1FusionFlag(); - - void SetSocVersion(std::string soc_version); - void SetCoreType(std::string core_type); - void SetAICoreNum(uint32_t ai_core_num); - void SetL1FusionFlag(std::string l1_fusion_flag); - private: - OptionalInfosImplPtr optional_infos_impl_{nullptr}; -}; - -} -#endif diff --git a/metadef/inc/graph/debug/ge_attr_define.h b/metadef/inc/graph/debug/ge_attr_define.h index 6796604a..9e9ffeee 100644 --- a/metadef/inc/graph/debug/ge_attr_define.h +++ b/metadef/inc/graph/debug/ge_attr_define.h @@ -1045,7 +1045,6 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_TBE_KERNEL_BUFFER; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DATA_SLICE; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NEED_RECOVER_ATTR; - // used for memory allocate GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_MEM_TYPE_LIST; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OUTPUT_MEM_TYPE_LIST; diff --git a/metadef/inc/register/proto/op_mapping_info.proto b/metadef/inc/register/proto/op_mapping_info.proto index 7fb6f84b..e23b7ebe 100644 --- a/metadef/inc/register/proto/op_mapping_info.proto +++ b/metadef/inc/register/proto/op_mapping_info.proto @@ -15,7 +15,6 @@ message Output { int32 original_output_data_type = 7; int32 original_output_format = 8; uint64 size = 9; - Shape origin_shape = 10; } message Input { @@ -24,7 +23,6 @@ message Input { Shape shape = 3; uint64 address = 4; uint64 size = 5; - Shape origin_shape = 6; } enum BufferType { diff --git a/metadef/third_party/graphengine/inc/external/ge/ge_api_types.h b/metadef/third_party/graphengine/inc/external/ge/ge_api_types.h index 4d80ab13..104f05a6 100644 --- a/metadef/third_party/graphengine/inc/external/ge/ge_api_types.h +++ b/metadef/third_party/graphengine/inc/external/ge/ge_api_types.h @@ -406,19 +406,13 @@ const std::set ir_builder_suppported_options = {INPUT_FORMAT, OP_BANK_PATH_FLAG}; // for interface: aclgrphParse -const std::set ir_parser_suppported_options = {INPUT_FORMAT, - INPUT_SHAPE, - OP_NAME_MAP, - IS_DYNAMIC_INPUT, - INPUT_FP16_NODES, +const std::set ir_parser_suppported_options = {INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT, - OUTPUT_TYPE, OUT_NODES, COMPRESS_WEIGHT_CONF, - ENABLE_SCOPE_FUSION_PASSES, - LOG_LEVEL}; + ENABLE_SCOPE_FUSION_PASSES}; // for interface: aclgrphBuildInitialize const std::set global_options = {CORE_TYPE, diff --git a/metadef/third_party/graphengine/inc/framework/omg/parser/model_parser.h b/metadef/third_party/graphengine/inc/framework/omg/parser/model_parser.h index 20bfcef4..240bd839 100644 --- a/metadef/third_party/graphengine/inc/framework/omg/parser/model_parser.h +++ b/metadef/third_party/graphengine/inc/framework/omg/parser/model_parser.h @@ -65,6 +65,18 @@ class ModelParser { virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; /** + * @ingroup domi_omg + * @brief Parse relevant data from memory and save it to graph + * @param [in] input Model file memory data + * @param [in] input Model file memory size + * @param [in|out] graph A graph for saving the model information after analysis + * @return SUCCESS + * @return FAILED + * @author + */ + virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0; + + /** * @ingroup domi_omg * @brief Analyze network model data * @param [in] proto network model diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine.h b/third_party/fwkacllib/inc/cce/aicpu_engine.h index 8bf0bdb6..b83731a8 100644 --- a/third_party/fwkacllib/inc/cce/aicpu_engine.h +++ b/third_party/fwkacllib/inc/cce/aicpu_engine.h @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef AICPU_ENGINE_H__ #define AICPU_ENGINE_H__ diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h index e491d43f..972f470c 100644 --- a/third_party/fwkacllib/inc/hccl/hcom.h +++ b/third_party/fwkacllib/inc/hccl/hcom.h @@ -40,15 +40,6 @@ extern "C" { * @param rankSize A pointer identifying the rank number. * @return HcclResult */ -HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); - -/** - * @brief Get the rank number in the group. - * - * @param group A string identifying the group name. - * @param rankSize A pointer identifying the rank number. - * @return HcclResult - */ HcclResult HcomGetRankSize(const char *group, u32 *rankSize); /** @@ -58,15 +49,6 @@ HcclResult HcomGetRankSize(const char *group, u32 *rankSize); * @param localRankSize A pointer identifying the rank number. * @return HcclResult */ -HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); - -/** - * @brief Get the rank number of this rank's server within the group. - * - * @param group A string identifying the group name. - * @param localRankSize A pointer identifying the rank number. - * @return HcclResult - */ HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize); /** @@ -76,15 +58,6 @@ HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize); * @param rankId A pointer identifying the rank id. * @return HcclResult */ -HcclResult hcom_get_rank_id(const char *group, u32 *rankId); - -/** - * @brief Get the rank id of this rank. - * - * @param group A string identifying the group name. - * @param rankId A pointer identifying the rank id. - * @return HcclResult - */ HcclResult HcomGetRankId(const char *group, u32 *rankId); /** @@ -94,15 +67,6 @@ HcclResult HcomGetRankId(const char *group, u32 *rankId); * @param localRankId A pointer identifying the local rank id. * @return HcclResult */ -HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); - -/** - * @brief Get the local rank id of this rank's server within the group. - * - * @param group A string identifying the group name. - * @param localRankId A pointer identifying the local rank id. - * @return HcclResult - */ HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId); /** @@ -113,16 +77,6 @@ HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId); * @param worldRank A pointer identifying the world rank id. * @return HcclResult */ -HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); - -/** - * @brief Get the world rank id according to the group rank id. - * - * @param group A string identifying the group name. - * @param groupRank An integer(u32) identifying the group rank id. - * @param worldRank A pointer identifying the world rank id. - * @return HcclResult - */ HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank); /** @@ -133,16 +87,6 @@ HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 * * @param groupRank A pointer identifying the group rank id. * @return HcclResult */ -HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank); - -/** - * @brief Get the group rank id according to the world rank id. - * - * @param worldRank An integer(u32) identifying the world rank id. - * @param group A string identifying the group name. - * @param groupRank A pointer identifying the group rank id. - * @return HcclResult - */ HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank); /** @@ -153,16 +97,6 @@ HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 * * @param rankIds A list identifying the ranks in the group. * @return HcclResult */ -HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); - -/** - * @brief Create group. - * - * @param group A string identifying the group name. - * @param rankNum An integer(u32) identifying the number of ranks in the group. - * @param rankIds A list identifying the ranks in the group. - * @return HcclResult - */ HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds); /** @@ -171,14 +105,6 @@ HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds); * @param group A string identifying the group name. * @return HcclResult */ -HcclResult hcom_destroy_group(const char *group); - -/** - * @brief Destroy group - * - * @param group A string identifying the group name. - * @return HcclResult - */ HcclResult HcomDestroyGroup(const char *group); /** @@ -189,46 +115,54 @@ HcclResult HcomDestroyGroup(const char *group); * @param IdxList A list identifying the index of end gradient in each segment. * @return HcclResult */ -extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); +extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList); /** - * @brief Set the gradient split strategy with in the group, according to gradient index. + * @brief Set the gradient split strategy with in the group, according to gradient data size. * * @param group A string identifying the group name. * @param segmentNum An integer(u32) identifying the segments number of gradients. - * @param IdxList A list identifying the index of end gradient in each segment. + * @param sizeList A list identifying the percent of each segment. * @return HcclResult */ -extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList); +extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); /** - * @brief Set the gradient split strategy with in the group, according to gradient data size. + * @brief Initialize hcom executor. * - * @param group A string identifying the group name. - * @param segmentNum An integer(u32) identifying the segments number of gradients. - * @param sizeList A list identifying the percent of each segment. + * @param void * @return HcclResult */ -extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList); +HcclResult HcomExecInitialize(); /** - * @brief Set the gradient split strategy with in the group, according to gradient data size. + * @brief Finalize hcom executor. * - * @param group A string identifying the group name. - * @param segmentNum An integer(u32) identifying the segments number of gradients. - * @param sizeList A list identifying the percent of each segment. + * @param void * @return HcclResult */ -extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); +HcclResult HcomExecFinalize(); /** - * @brief Register memories and init resources for remote access. + * @brief Put collective communication operation into hcom executor. * - * @param addrList memory addresses for remote access. - * @param count number of remote memory addresses. + * @param opInfo information about collective communication operation. + * @param callback callback after collective communication operation. * @return HcclResult */ -extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count); +HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function callback); + +/** + * @brief Put remote access operation into hcom executor. + * + * @param remoteAccessType operation type (read or write). + * @param addrInfos address information about collective communication operation. + * @param callback callback after collective communication operation. + * @return HcclResult + */ +HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, + const std::vector& addrInfos, + std::function callback); /** * @brief Register memories and init resources for remote access. @@ -239,16 +173,6 @@ extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrLis */ extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count); -HcclResult HcomExecInitialize(); - -HcclResult HcomExecFinalize(); - -HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function callback); - -HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, - const std::vector& addrInfos, - std::function callback); - #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h index 66638bbb..993f36ba 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h @@ -279,8 +279,10 @@ typedef struct { #define M_NAME_MAX MAX_FNAME #define M_F_OK F_OK -#define M_R_OK R_OK +#define M_X_OK X_OK #define M_W_OK W_OK +#define M_R_OK R_OK + #define MM_DT_DIR DT_DIR #define MM_DT_REG DT_REG diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h index aa58e722..ecc86bf8 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h @@ -322,6 +322,7 @@ typedef VOID (*mmPf)(VOID); #define M_NAME_MAX _MAX_FNAME #define M_F_OK 0 +#define M_X_OK 1 #define M_W_OK 2 #define M_R_OK 4 diff --git a/third_party/fwkacllib/inc/ops/aipp.h b/third_party/fwkacllib/inc/ops/aipp.h index bed984bd..86805f72 100644 --- a/third_party/fwkacllib/inc/ops/aipp.h +++ b/third_party/fwkacllib/inc/ops/aipp.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -65,6 +65,8 @@ in aipp config file, framework will auto add one input node to graph at last. \n *@par Third-party framework compatibility *Compatible with the TensorFlow operator AippData. +*@par Restrictions: +*Warning: This operator can be integrated only by configuring INSERT_OP_FILE of aclgrphBuildModel. Please do not use it directly. */ REG_OP(AippData) .INPUT(data, TensorType::ALL()) diff --git a/third_party/fwkacllib/inc/ops/all_ops.h b/third_party/fwkacllib/inc/ops/all_ops.h index 1ac83783..614b06e2 100644 --- a/third_party/fwkacllib/inc/ops/all_ops.h +++ b/third_party/fwkacllib/inc/ops/all_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h index e1f64421..19147d64 100644 --- a/third_party/fwkacllib/inc/ops/array_ops.h +++ b/third_party/fwkacllib/inc/ops/array_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1153,6 +1153,77 @@ REG_OP(EditDistance) .OUTPUT(output, TensorType({DT_FLOAT})) .OP_END_FACTORY_REG(EditDistance) +/** +* @brief sort_v2. + +* @par Inputs: +* @li x: An ND tensor of type float16. + +* @par Attributes: + +* @li axis: An optional int. The dimension to sort along. This value defaults to -1. +* @li descending: An optional bool. Controls the sorting order (ascending or descending). This value defaults to False. + +* @par Outputs: +* @li y: An ND tensor of type float16. + +* @attention Constraints: +* @li Axis should select the last dim. +* @li The upper limit of data on Ascend310 is 500K, and that on Ascend910 is 2000K. +*/ +REG_OP(SortV2) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .ATTR(axis, Int, -1) + .ATTR(descending, Bool, false) + .OP_END_FACTORY_REG(SortV2) + +/** +* @brief Expand the input tensor to a compatible shape. \n + +* @par Inputs: +* One inputs, including: +* @li x: A Tensor. Must be one of the following types: +* float16, float32, int32, int8 ,uint8. \n +* @li shape: A Tensor to specify the shape that the input tensor expanded to. \n + +* @par Outputs: +* @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n + +* @par Third-party framework compatibility +* Compatible with the ONNX operator Expand. +*/ + +REG_OP(Expand) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .INPUT(shape, TensorType({DT_INT16, DT_INT32, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .OP_END_FACTORY_REG(Expand) + +/** +* @brief Expand the input tensor to a compatible shape. \n + +* @par Inputs: +* One inputs, including: +* @li x: A Tensor. Must be one of the following types: +* float16, float32, int32, int8 ,uint8. \n + +* @par Attributes: +* @li shape: A required listInt to specify the shape that the input tensor expanded to. \n + + +* @par Outputs: +* @li y: A Tensor. Has the same type as "x", and the shape specified by input and attr shape \n + +* @par Third-party framework compatibility +* Compatible with the ONNX operator Expand. +*/ + +REG_OP(ExpandD) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .REQUIRED_ATTR(shape, ListInt) + .OP_END_FACTORY_REG(ExpandD) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/audio_ops.h b/third_party/fwkacllib/inc/ops/audio_ops.h index d9883253..f05135d1 100644 --- a/third_party/fwkacllib/inc/ops/audio_ops.h +++ b/third_party/fwkacllib/inc/ops/audio_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/batch_ops.h b/third_party/fwkacllib/inc/ops/batch_ops.h index 8a1c5a7b..a4786cd3 100644 --- a/third_party/fwkacllib/inc/ops/batch_ops.h +++ b/third_party/fwkacllib/inc/ops/batch_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/bitwise_ops.h b/third_party/fwkacllib/inc/ops/bitwise_ops.h index 5c83e161..39a28cf3 100644 --- a/third_party/fwkacllib/inc/ops/bitwise_ops.h +++ b/third_party/fwkacllib/inc/ops/bitwise_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h index 550e8b7d..08e54824 100644 --- a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h +++ b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h index e20607bf..890c52ae 100644 --- a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h +++ b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/condtake_ops.h b/third_party/fwkacllib/inc/ops/condtake_ops.h index 5e91eb07..029cffbf 100644 --- a/third_party/fwkacllib/inc/ops/condtake_ops.h +++ b/third_party/fwkacllib/inc/ops/condtake_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/control_flow_ops.h b/third_party/fwkacllib/inc/ops/control_flow_ops.h index 7196b14f..c0b6ad72 100644 --- a/third_party/fwkacllib/inc/ops/control_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/control_flow_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/ctc_ops.h b/third_party/fwkacllib/inc/ops/ctc_ops.h index 2c75fd09..c6a265cc 100644 --- a/third_party/fwkacllib/inc/ops/ctc_ops.h +++ b/third_party/fwkacllib/inc/ops/ctc_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h index bb937a75..18a79289 100644 --- a/third_party/fwkacllib/inc/ops/data_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index c64bc138..81d2743d 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -122,7 +122,8 @@ REG_OP(MinimumGrad) *@par Inputs: *One input: *x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8, - int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. \n + int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. + For float32 type, the actual calculation on the chip is based on float16. \n *@par Attributes: *dst_type: An required attribute of type int32, specifying the dst data type. \n @@ -611,6 +612,15 @@ REG_OP(Log1p) *@par Outputs: *y: A Tensor. Has the same type as "x1". + +*@attention Constraints: +*@li x2: The input data does not support 0 +*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the +*requirement of double thousandths in the mini form +*@li Due to different architectures, the calculation results of this operator +*on NPU and CPU may be inconsistent +*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 + *@par Third-party framework compatibility *Compatible with the TensorFlow operator Mod. */ @@ -2042,6 +2052,15 @@ REG_OP(FloorDiv) * *@par Outputs: *y: Result remainder. + +*@attention Constraints: +*@li x2: The input data does not support 0 +*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the +*requirement of double thousandths in the mini form +*@li Due to different architectures, the calculation results of this operator +*on NPU and CPU may be inconsistent +*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 + *@par Third-party framework compatibility * Compatible with the TensorFlow operator FloorMod. */ @@ -2168,6 +2187,14 @@ REG_OP(Tan) *@par Outputs: *y: A Tensor. Has the same type as "x1". \n +*@attention Constraints: +*@li x2: The input data does not support 0 +*@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the +*requirement of double thousandths in the mini form +*@li Due to different architectures, the calculation results of this operator +*on NPU and CPU may be inconsistent +*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 + *@par Third-party framework compatibility *@li Compatible with the TensorFlow operator TruncateMod. */ @@ -2829,9 +2856,9 @@ REG_OP(AdamApplyOneAssign) *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LambApplyOptimizerAssign) - .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(mul0_x, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(mul1_x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -2842,6 +2869,8 @@ REG_OP(LambApplyOptimizerAssign) .INPUT(do_use_weight, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(weight_decay_rate, TensorType({DT_FLOAT16,DT_FLOAT})) .OUTPUT(output0, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(inputv, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(inputm, TensorType({DT_FLOAT16,DT_FLOAT})) .OP_END_FACTORY_REG(LambApplyOptimizerAssign) /** @@ -2873,7 +2902,8 @@ REG_OP(LambApplyWeightAssign) .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input2, TensorType({DT_FLOAT16,DT_FLOAT})) .INPUT(input3, TensorType({DT_FLOAT16,DT_FLOAT})) - .INPUT(input4, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(input_param, TensorType({DT_FLOAT16,DT_FLOAT})) .OP_END_FACTORY_REG(LambApplyWeightAssign) /** @@ -3329,8 +3359,248 @@ REG_OP(TensorRedirect) .OUTPUT(output_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_INT64, DT_INT16, DT_UINT16, DT_UINT64, DT_UINT32})) .OP_END_FACTORY_REG(TensorRedirect) -} // namespace ge +/** +* @brief Performs the element-wise division of tensor x2 by tensor x3, +* multiply the result by the scalar value and add it to tensor x1 + +* @par Inputs: +* Three inputs, including: +* @li input_data: A mutable input Tensor. Must be one of the following types: +* float16, float32. +* @li x1: A mutable input Tensor of the same type as x1. +* @li x2: A mutable input Tensor of the same type as x1. +* @li value: A mutable input Tensor. Must be one of the following types: +* float16, float32, int32. \n + +* @par Outputs: +* @li y: A mutable Tensor. Has the same type as "x1". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Addcdiv. +*/ +REG_OP(Addcdiv) + .INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32 })) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(Addcdiv) + +/** +* @brief Performs the element-wise multiplication of tensor x2 by tensor x3, +* multiply the result by the scalar value and add it to tensor input_data + + +* @par Inputs: +* Three inputs, including: +* @li input_data: A mutable input Tensor. Must be one of the following types: +* float16, float32, int8, int32, uint8. +* @li x1: A mutable input Tensor of the same type as x1. +* @li x2: A mutable input Tensor of the same type as x1. +* @li value: A tensor which includes only one element of the same type as x1. \n + +* @par Outputs: +* @li y: A mutable output Tensor. Has the same type as "x1". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Addcmul. +*/ +REG_OP(Addcmul) + .INPUT(input_data, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) + .INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) + .INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) + .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) + .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) + .OP_END_FACTORY_REG(Addcmul) + +/** +* @brief Computes the result of x2 * alpha + x1. + +* @par Inputs: +* @li x1: An ND tensor of type float16, float32, int32. +* @li x2: An ND tensor of type float16, float32, int32. +* @li alpha: A scalar tensor of type float16, float32. \n + +* @par Outputs: +* @li y: An ND tensor tensor with the same shape and type as "x1". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Axpy. +*/ +REG_OP(AxpyV2) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OP_END_FACTORY_REG(AxpyV2) + +/** +* @brief Computes the result of x1 + x2. + +* @par Inputs: +* @li x1: An ND tensor of type float16, float, int32. +* @li x2: An ND tensor of type float16, float, int32. \n + +* @par Outputs: +* @li y: An ND tensor tensor with the same type as "x1". \n +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Add. +*/ +REG_OP(PtAdd) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OP_END_FACTORY_REG(PtAdd) + +/** +* @brief Computes the result of x1 * x2. + +* @par Inputs: +* @li x1: An ND tensor of type float16, float32, int32. +* @li x2: An ND tensor of type float16, float32, int32. \n + +* @par Outputs: +* @li y: Same shape and type as the largest ND tensor in x1 x2. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator muls. +*/ +REG_OP(PtMuls) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OP_END_FACTORY_REG(PtMuls) + +/** +* @brief Computes the result of x1 - x2. + +* @par Inputs: +* @li x1: An ND tensor of type float16, float, int32. +* @li x2: An ND tensor of type float16, float, int32. \n + +* @par Outputs: +* @li y: An ND tensor tensor with the same type as "x1". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Sub. +*/ +REG_OP(PtSub) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OP_END_FACTORY_REG(PtSub) + +/** +* @brief Add the partial values of two tensors in format NC1HWC0. + +* @par Inputs: +* @li x1: A Tensor in 5HD, and must be one of the following types: float16, +* float32. \n +* @li x2: A Tensor of the same type as "x1", and the same shape as "x1", +* except for the C1 value. \n + +* @par Attributes: +* @li x1_c1_offset: A required int. Offset value of C1 in "x1". \n +* @li x2_c1_offset: A required int. Offset value of C1 in "x2". \n +* @li c1_len: A required int. C1 len of "y". The value must be less than +* the difference between C1 and offset in "x1" and "x2". \n + +* @par Outputs: +* @li y: A Tensor of the same type as "x1", and the same shape as "x1", +* except for the C1 value. Record the result after adding. \n +*/ +REG_OP(StrideAdd) + .INPUT(x1, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(x2, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .REQUIRED_ATTR(x1_c1_offset, Int) + .REQUIRED_ATTR(x2_c1_offset, Int) + .REQUIRED_ATTR(c1_len, Int) + .OP_END_FACTORY_REG(StrideAdd) + +/** +* @brief Compare two tensors are totally equal or not, only output a bool value" + +* @par Inputs: +* Two inputs, including: +* @li input_x: A Tensor. the first tensor. \n +* @li input_y: A Tensor. the second tensor. \n + +* @par Outputs: +* @li output_z: A Tensor. Bool type, compare result of the two inputs. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch equal operator. \n +*/ +REG_OP(TensorEqual) + .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .OUTPUT(output_z, TensorType({DT_BOOL})) + .OP_END_FACTORY_REG(TensorEqual) + +/** + * @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support). + * All inputs and outputs must have the same data type. This operator supports multidirectional + * (i.e., Numpy-style) broadcasting + * + * @par inputs + * one input including: + * @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64 + * + * @par output + * one output including: + * @li y:A Tensor of the same type as x + * + */ +REG_OP(MaxN) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, DT_INT32, DT_INT64})) + .OP_END_FACTORY_REG(MaxN) + +/** + * @brief Element-wise min of each of the input tensors (with Numpy-style broadcasting support). + * All inputs and outputs must have the same data type. This operator supports multidirectional + * (i.e., Numpy-style) broadcasting + * + * @par inputs + * one input including: + * @li x: dynamic input A Tensor. Must be one of the following types: float32, float16, double, int32, int64 + * + * @par output + * one output including: + * @li y:A Tensor of the same type as x + * + */ +REG_OP(MinN) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, + DT_INT32, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64, + DT_INT32, DT_INT64})) + .OP_END_FACTORY_REG(MinN) + +/** + * @brief Calculates x * maske * value. + * + * @par Inputs: + * @li x: An tensor of type float16 or float32, specifying the input to the data layer. + * @li mask: An tensor of type int8 or float16 or float32, be same shape with x. \n + * + * @par Attributes: + * value: A optional float. \n + * + * @par Outputs: + * y: The output tensor of type float16 or float32. + @ li y:A Tensor of the same type and shape as x + * + */ +REG_OP(MaskedScale) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32})) + .INPUT(mask, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32})) + .REQUIRED_ATTR(value, Float) + .OP_END_FACTORY_REG(MaskedScale) +} // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/functional_ops.h b/third_party/fwkacllib/inc/ops/functional_ops.h index 598d3ad3..b09ac058 100644 --- a/third_party/fwkacllib/inc/ops/functional_ops.h +++ b/third_party/fwkacllib/inc/ops/functional_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/get_data_ops.h b/third_party/fwkacllib/inc/ops/get_data_ops.h index 33dc4f14..e5518ef8 100644 --- a/third_party/fwkacllib/inc/ops/get_data_ops.h +++ b/third_party/fwkacllib/inc/ops/get_data_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/hcom_ops.h b/third_party/fwkacllib/inc/ops/hcom_ops.h index b90b225e..cb9fbe22 100644 --- a/third_party/fwkacllib/inc/ops/hcom_ops.h +++ b/third_party/fwkacllib/inc/ops/hcom_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,8 +45,6 @@ REG_OP(HcomAllGather) .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) .REQUIRED_ATTR(rank_size, Int) .REQUIRED_ATTR(group, String) - .ATTR(alpha, Float, 1.0) - .ATTR(beta, Float, 0.0) .OP_END_FACTORY_REG(HcomAllGather) /** @@ -77,8 +75,6 @@ REG_OP(HcomAllReduce) .REQUIRED_ATTR(group, String) .ATTR(fusion, Int, 1) .ATTR(fusion_id, Int, -1) - .ATTR(alpha, Float, 1.0) - .ATTR(beta, Float, 0.0) .OP_END_FACTORY_REG(HcomAllReduce) /** @@ -91,7 +87,7 @@ REG_OP(HcomAllReduce) input of this rank will be broadcast to other ranks. * @li fusion: A required integer identifying if the op need to fusion,the default value is none fusion - * @li fusion: A required integer identifying the fusion id if para fusion + * @li fusion_id: A required integer identifying the fusion id if para fusion is set. * @li group: A required string identifying the group name of ranks participating in the op. @@ -109,11 +105,40 @@ REG_OP(HcomBroadcast) .REQUIRED_ATTR(group, String) .ATTR(fusion, Int, 0) .ATTR(fusion_id, Int, -1) - .ATTR(alpha, Float, 1.0) - .ATTR(beta, Float, 0.0) .OP_END_FACTORY_REG(HcomBroadcast) /** + * @brief preforms reduction from others rank to rootrank + * @par Inputs: +* @li root_rank: A required integer identifying the root rank in the op + the reduction result will be on this root rank + * x: A tensor. Must be one of the following types: int8, int16, int32, float16, + float32. + * @par Attributes: + * @li reduction: A required string identifying the reduction operation to + perform.The supported operation are: "sum", "max", "min", "prod". + * @li group: A required string identifying the group name of ranks + participating in the op. + * @li fusion: An optional integer identifying the fusion flag of the op. + 0: no fusion; 1 (default): fusion; 2: fusion the ops by fusion id. + * @li fusion_id: An optional integer identifying the fusion id of the op. + * The HcomReduce ops with the same fusion id will be fused. + * @par Outputs: + * y: A Tensor. Has the same type as "x". + * @attention Constraints: + *"group" is limited to 128 characters. Use "hccl_world_group" + as the name of a world group. + */ +REG_OP(HcomReduce) + .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) + .REQUIRED_ATTR(root_rank, Int) + .REQUIRED_ATTR(reduction, String) + .REQUIRED_ATTR(group, String) + .ATTR(fusion, Int, 0) + .ATTR(fusion_id, Int, -1) + .OP_END_FACTORY_REG(HcomReduce) +/** * @brief Performs reduction across all input tensors, scattering in equal blocks among ranks, each rank getting a chunk of data based on its rank index. @@ -139,8 +164,6 @@ REG_OP(HcomReduceScatter) .REQUIRED_ATTR(reduction, String) .REQUIRED_ATTR(group, String) .REQUIRED_ATTR(rank_size, Int) - .ATTR(alpha, Float, 1.0) - .ATTR(beta, Float, 0.0) .OP_END_FACTORY_REG(HcomReduceScatter) /** @@ -167,8 +190,6 @@ REG_OP(HcomSend) .REQUIRED_ATTR(group, String) .REQUIRED_ATTR(sr_tag, Int) .REQUIRED_ATTR(dest_rank, Int) - .ATTR(alpha, Float, 1.0) - .ATTR(beta, Float, 0.0) .OP_END_FACTORY_REG(HcomSend) /** @@ -202,8 +223,6 @@ REG_OP(HcomReceive) .REQUIRED_ATTR(src_rank, Int) .REQUIRED_ATTR(shape, ListInt) .REQUIRED_ATTR(dtype, Type) - .ATTR(alpha, Float, 1.0) - .ATTR(beta, Float, 0.0) .OP_END_FACTORY_REG(HcomReceive) /** diff --git a/third_party/fwkacllib/inc/ops/hvd_ops.h b/third_party/fwkacllib/inc/ops/hvd_ops.h index a49ec5ed..00299ef7 100644 --- a/third_party/fwkacllib/inc/ops/hvd_ops.h +++ b/third_party/fwkacllib/inc/ops/hvd_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index ce3262f9..726cd0e5 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,11 +31,12 @@ namespace ge { *@par Inputs: *Input images is a tensor of at least 3 dimensions. The last dimension is interpretted as channels, and must be three. Inputs include: -*@li images:A Tensor of type float. Images to adjust. At least 3-D. +*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format +must be NHWC. *@li delta:A Tensor of type float. A float delta to add to the hue . \n *@par Outputs: -*y:A Tensor of type float . \n +*y:A Tensor of type float. The format must be NHWC. \n *@attention Constraints: *Input images is a tensor of at least 3 dimensions. The last dimension is @@ -57,11 +58,12 @@ REG_OP(AdjustHue) *@par Inputs: *Input images is a tensor of at least 3 dimensions. The last dimension is interpretted as channels, and must be three. Inputs include: -*@li images:A Tensor of type float. Images to adjust. At least 3-D. +*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format +must be NHWC. *@li scale:A Tensor of type float. A float scale to add to the saturation . \n *@par Outputs: -*y:A Tensor of type float . \n +*y:A Tensor of type float. The format must be NHWC. \n *@attention Constraints: *Input images is a tensor of at least 3 dimensions. The last dimension is @@ -83,11 +85,12 @@ REG_OP(AdjustSaturation) *@par Inputs: *Input images is a tensor of at least 3 dimensions. The last 3 dimensions are interpreted as '[height, width, channels]'. Inputs include: -*@li images:A Tensor of type float. Images to adjust. At least 3-D. +*@li images:A Tensor of type float. Images to adjust. At least 3-D. The format +must be NHWC. *@li scale:A Tensor of type float. A float multiplier for adjusting contrast . \n *@par Outputs: -*y:A Tensor of type float . \n +*y:A Tensor of type float. The format must be NHWC. \n *@attention Constraints: *Input images is a tensor of at least 3 dimensions. The last dimension is @@ -112,7 +115,7 @@ nearest neighbor sampling to a common output size specified by crop_size . \n *Input images must be a 4-D tensor. Inputs include: *@li images:A Tensor. Must be one of the following types:uint8, uint16, int8, int16, int32, int64, float16, float, double. A 4-D tensor of shape -[batch, image_height, image_width, depth]. +[batch, image_height, image_width, depth]. The format must be NHWC. *@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4]. *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch). @@ -127,7 +130,7 @@ extrapolation, when applicable. NearestNeighbor . \n *@par Outputs: -*y:A Tensor of type float . \n +*y:A Tensor of type float. The format must be NHWC. \n *@attention Constraints: *Input images must be a 4-D tensor . \n @@ -193,7 +196,9 @@ boxes tensor . \n *@par Inputs: *Input images and grads must be a 4-D tensor. Inputs include: *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. +The format must be NHWC. *@li images: A 4-D tensor of shape [batch, image_height, image_width, depth]. +The format must be NHWC. Both image_height and image_width need to be positive. *@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor specifies the coordinates of a box in the box_ind[i] image and is specified in @@ -233,6 +238,7 @@ images tensor . \n *@par Inputs: *Input grads must be a 4-D tensor. Inputs include: *@li grads: A 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]. +The format must be NHWC. *@li boxes: A 2-D tensor of shape [num_boxes, 4]. The i-th row of the tensor specifies the coordinates of a box in the box_ind[i] image and is specified in normalized coordinates [y1, x1, y2, x2]. @@ -248,7 +254,8 @@ method: A string specifying the interpolation method. Only 'bilinear' is supported for now . \n *@par Outputs: -*y:A 4-D tensor of shape [batch, image_height, image_width, depth] . \n +*y:A 4-D tensor of shape [batch, image_height, image_width, depth]. The format +must be NHWC. \n *@attention Constraints: *Input grads must be a 4-D tensor . \n @@ -273,6 +280,7 @@ REG_OP(CropAndResizeGradImage) *@par Inputs: *Input x must be a 4-D tensor. Inputs include: *@li x: A 4-D float tensor of shape [batch_size, height, width, channels]. +The format must be NHWC. *@li size: A 1-D tensor of 2 elements containing the size of the glimpses to extract. The glimpse height must be specified first, following by the glimpse width. @@ -293,7 +301,7 @@ uniform_noise . \n *@par Outputs: *y:A tensor representing the glimpses [batch_size, glimpse_height, -glimpse_width, channels] . \n +glimpse_width, channels]. The format must be NHWC. \n *@attention Constraints: *Input x must be a 4-D tensor . \n @@ -340,7 +348,8 @@ REG_OP(HSVToRGB) *@par Inputs: *Input images must be a 4-D tensor. Inputs include: -*@li images: 4-D with shape [batch, height, width, channels]. +*@li images: 4-D with shape [batch, height, width, channels]. The format must +be NHWC. *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new size for the images. *@li min: A Tensor of type float. @@ -354,6 +363,7 @@ the values at the corner pixels. Defaults to false. *@par Outputs: *@li resized_images: 4-D with shape [batch, new_height, new_width, channels]. +The format must be NHWC. *@li y_min: A Tensor of type float. *@li y_max: A Tensor of type float . \n @@ -381,7 +391,8 @@ REG_OP(QuantizedResizeBilinear) *@par Inputs: *Input images must be a 4-D tensor. Inputs include: -*@li images: 4-D with shape [batch, height, width, channels]. +*@li images: 4-D with shape [batch, height, width, channels]. The format must +be NHWC. *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new size for the images . \n @@ -391,7 +402,8 @@ output tensors are aligned, preserving the values at the corner pixels. Defaults to false . \n *@par Outputs: -*y: 4-D with shape [batch, new_height, new_width, channels] . \n +*y: 4-D with shape [batch, new_height, new_width, channels]. The format must +be NHWC. \n *@attention Constraints: *Input images can be of different types but output images are always float . \n @@ -414,10 +426,10 @@ REG_OP(ResizeArea) *@par Inputs: *Input grads must be a 4-D tensor. Inputs include: *@li grads: A Tensor of type float. 4-D with shape [batch, height, width, -channels]. +channels]. The format must be NHWC. *@li original_image: A Tensor. Must be one of the following types: float, double. 4-D with shape [batch, orig_height, orig_width, channels], The image -tensor that was resized . \n +tensor that was resized. The format must be NHWC. \n *@par Attributes: *@li align_corners: An optional bool. Defaults to False. If true, the centers @@ -426,10 +438,10 @@ false. *@li half_pixel_centers: An optional bool. Defaults to False . \n *@par Outputs: -*y: A Tensor. Has the same type as original_image . \n +*y: A Tensor. Has the same type as original_image. The format must be NHWC. \n *@attention Constraints: -*Input images can be of different types but output images are always float . \n +*Input images can be of different types but output images are always float . *@par Third-party framework compatibility *Compatible with tensorflow ResizeBicubicGrad operator. @@ -448,7 +460,8 @@ REG_OP(ResizeBicubicGrad) *@par Inputs: *Input images must be a 4-D tensor. Inputs include: -*@li images: 4-D with shape [batch, height, width, channels]. +*@li images: 4-D with shape [batch, height, width, channels]. The format +must be NHWC. *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new size for the images . \n @@ -459,10 +472,11 @@ Defaults to false. *@li half_pixel_centers: An optional bool. Defaults to False . \n *@par Outputs: -*y: 4-D with shape [batch, new_height, new_width, channels] . \n +*y: 4-D with shape [batch, new_height, new_width, channels]. The format +must be NHWC. \n *@attention Constraints: -*Input images can be of different types but output images are always float . \n +*Input images can be of different types but output images are always float . *@par Third-party framework compatibility *Compatible with tensorflow ResizeBicubic operator. @@ -483,7 +497,7 @@ REG_OP(ResizeBicubic) *@par Inputs: *Input grads must be a 4-D tensor. Inputs include: *@li grads: A Tensor. Must be one of the following types: uint8, int8, int32, -float16, float, double. 4-D with shape [batch, height, width, channels]. +float16, float, double. Must set the format, supported format list ["NCHW, NHWC"] *@li size: A 1-D int32 Tensor of 2 elements: orig_height, orig_width. The original input size . \n @@ -550,9 +564,8 @@ REG_OP(ResizeNearestNeighborV2GradD) *@par Inputs: *Input grads must be a 4-D tensor. Inputs include: -*@li grads: A Tensor of type float32. 4-D with shape [batch, height, width, -channels]. -*@li original_image: A Tensor. 4-D with shape [batch, orig_height, orig_width, +*@li grads: A Tensor of type float32. Must set the format, supported format list ["NCHW, NHWC"] +*@li original_image: A Tensor. 4-D shape. Must set the format, supported format list ["NCHW, NHWC"] channels], The image tensor that was resized . \n *@par Attributes: @@ -583,7 +596,7 @@ REG_OP(ResizeBilinearV2Grad) *@par Inputs: *Input images must be a 4-D tensor. Inputs include: -*@li x: 4-D with shape [batch, height, width, channels]. +*@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"] *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new size for the images . \n @@ -697,7 +710,7 @@ REG_OP(SampleDistortedBoundingBoxExt2) *@par Inputs: *Input x must be a 4-D tensor. Inputs include: -*@li x: 4-D with shape [batch, height, width, channels]. +*@li x: 4-D tensor. Must set the format, supported format list ["NCHW, NHWC"]. *@li size: A 1-D int32 Tensor of 2 elements: new_height, new_width. The new size for the images . \n @@ -729,12 +742,12 @@ REG_OP(ResizeNearestNeighborV2) *@par Inputs: *Input images must be a 4-D tensor. Inputs include: *@li images: A Tensor. Must be one of the following types: float. 4-D with -shape [batch, height, width, depth]. A batch of images. +shape [batch, height, width, depth]. A batch of images. The format must be NHWC. *@li boxes: A Tensor of type float32. 3-D with shape [batch, num_bounding_boxes, 4] containing bounding boxes . \n *@par Outputs: -*A Tensor. Has the same type as images . \n +*A Tensor. Has the same type as images. The format must be NHWC. \n *@attention Constraints: *Input images must be a 4-D tensor . \n @@ -1342,6 +1355,66 @@ REG_OP(SpatialTransformerD) .ATTR(use_default_theta, ListBool, {}) .OP_END_FACTORY_REG(SpatialTransformerD) +/** +* @brief Resize the input tensor. \n +currently, only support resize image tensor using nearest neighbor and linear interpolation. + +* @par Inputs: +* Input x must be a 4-D tensor. Inputs include: \n +* @li x: A Tensor. Must be one of the following types: uint8, int8, int16, \n +int32, int64, float16, float, double. 4-D with shape [batch, height, width, channels] \n +or shape [batch, channels, height, width]. +* @li roi: A 1-D float Tensor. only takes effect when attr coordinate_transformation_mode \n +is "tf_crop_and_resize" +* @li scales: A 1-D float Tensor, the scale array along each dimension, Only one of \n +'scales' and 'sizes' can be specified. +* @li sizes: A 1-D int64 Tensor, The size of the output tensor. nly one of \n +'scales' and 'sizes' can be specified. If 'size' is specified, then set scales \n +to empty data (zero shape) in this operator's input list. + +* @par Attributes: +* @li coordinate_transformation_mode: String. Defaults to half_pixel. how to transform \n +the coordinate in the resized tensor to the coordinate in the original tensor. \n +other optional: pytorch_half_pixel, align_corners, asymmetric, tf_half_pixel_for_nn, \n +tf_crop_and_resize. +* @li cubic_coeff_a: Float. Defaults to -0.75, only used in cubic interpolation. \n +other optional: -0.5 +* @li exclude_outside: Int. Defaults to 0, If set to 1, the weight of sampling \n +locations outside the tensor will be set to 0 and the weight will be renormalized \n +so that their sum is 1.0. +* @li extrapolation_value: Float. Defaults to 0.0f. When coordinate_transformation_mode \n +is "tf_crop_and_resize" and x_original is outside the range [0, length_original - 1], \n +this value is used as the corresponding output value. +* @li mode: String. Defaults to nearest. Three interpolation modes: nearest (default), \n +linear and cubic. +* @li nearest_mode: String. Defaults to round_prefer_floor. Four modes: round_prefer_floor, \n +round_prefer_ceil, floor, ceil. Only used by nearest interpolation. + +* @par Outputs: +* y: A Tensor. Has the same type as x. + +* @attention Constraints: \n +* Input x must be a 4-D tensor. + +* @par Third-party framework compatibility +* Compatible with tensorflow ResizeNearestNeighborV2 operator. +*/ + +REG_OP(Resize) + .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, + DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(roi, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(scales, TensorType({DT_FLOAT})) + .OPTIONAL_INPUT(sizes, TensorType({DT_INT64})) + .OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, + DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .ATTR(coordinate_transformation_mode, String, "half_pixel") + .ATTR(cubic_coeff_a, Float, -0.75) + .ATTR(exclude_outside, Int, 0) + .ATTR(extrapolation_value, Float, 0) + .ATTR(mode, String, "nearest") + .ATTR(nearest_mode, String, "round_prefer_floor") + .OP_END_FACTORY_REG(Resize) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/internal_ops.h b/third_party/fwkacllib/inc/ops/internal_ops.h index 9dde14a5..bcc3f1c3 100644 --- a/third_party/fwkacllib/inc/ops/internal_ops.h +++ b/third_party/fwkacllib/inc/ops/internal_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h index 7a6fbc59..d8f45c5d 100644 --- a/third_party/fwkacllib/inc/ops/linalg_ops.h +++ b/third_party/fwkacllib/inc/ops/linalg_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/logging_ops.h b/third_party/fwkacllib/inc/ops/logging_ops.h index bc8ae2b8..03be7757 100644 --- a/third_party/fwkacllib/inc/ops/logging_ops.h +++ b/third_party/fwkacllib/inc/ops/logging_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/lookup_ops.h b/third_party/fwkacllib/inc/ops/lookup_ops.h index b37ab048..5d928e5a 100644 --- a/third_party/fwkacllib/inc/ops/lookup_ops.h +++ b/third_party/fwkacllib/inc/ops/lookup_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index 149e0e37..e8dbd812 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -366,6 +366,27 @@ REG_OP(GetNext) .OP_END_FACTORY_REG(GetNext) /** +*@brief Get dynamic dims after GetNext. \n + +*@par Inputs: +*input: A nested structure of Tensor objects, from GetNext's output. \n + +*@par Attributes: +*@li shape_info: GE shape_info for each inputs, -1 means unknow dim. +*@li N: Inputs number. \n + +*@par Outputs: +*dims: GE unknow dims, a vector of int64. \n +*/ + +REG_OP(GetDynamicDims) + .DYNAMIC_INPUT(input, TensorType({DT_INT32, DT_INT64})) + .OUTPUT(dims, TensorType({DT_INT64})) + .REQUIRED_ATTR(shape_info, ListInt) + .REQUIRED_ATTR(N, Int) + .OP_END_FACTORY_REG(GetDynamicDims) + +/** *@brief End of sequence . \n *@par Inputs: @@ -710,6 +731,9 @@ REG_OP(IFMR) *@par Third-party framework compatibility *Compatible with mindspore + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(WtsARQ) @@ -741,6 +765,9 @@ REG_OP(WtsARQ) *@par Third-party framework compatibility *Compatible with mindspore + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ActsULQ) @@ -768,6 +795,9 @@ REG_OP(ActsULQ) *@par Third-party framework compatibility *Compatible with mindspore + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ActsULQInputGrad) @@ -790,6 +820,9 @@ REG_OP(ActsULQInputGrad) *@par Third-party framework compatibility *Compatible with mindspore + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ActULQClampMaxGrad) @@ -812,6 +845,9 @@ REG_OP(ActULQClampMaxGrad) *@par Third-party framework compatibility *Compatible with mindspore + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ActULQClampMinGrad) @@ -821,6 +857,33 @@ REG_OP(ActULQClampMinGrad) .OUTPUT(clamp_min_grad, TensorType({DT_FLOAT16, DT_FLOAT})) .OP_END_FACTORY_REG(ActULQClampMinGrad) +/** +* @brief Computes Lp norm. + +* @par Inputs: +* @li x: An ND tensor of type float16, float32. \n +* +* @par Attributes: +* @li p: Int, "inf" or "-inf", default value is 2. +* @li axes: ListInt, {} means all axes will be computed. +* @li keepdim: Bool, default is false. +* @li epsilon: Float, default is 1e-12. \n + +* @par Outputs: +* @li y: An ND tensor of type float16, float32. The shape of y is depending +* on axes and keepdim. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator LpNorm. +*/ +REG_OP(LpNorm) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(p, Int, 2) + .ATTR(axes, ListInt, {}) + .ATTR(keepdim, Bool, false) + .ATTR(epsilon, Float, 1e-12) + .OP_END_FACTORY_REG(LpNorm) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index ed23d3f6..33b596d8 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,8 +38,8 @@ namespace ge { * float32, int32. Has format [ND, NHWC] . \n *@par Attributes: -*@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. -*@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n +*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. +*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n *@par Outputs: *y: The result matrix Tensor. 2D. Must be one of the following types: float16, @@ -70,8 +70,8 @@ REG_OP(MatMul) * float32, int32. Has format [ND, NHWC] . \n *@par Attributes: -*@li transpose_a: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. -*@li transpose_b: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n +*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. +*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n *@par Outputs: *y: The result matrix Tensor. 2D. Must be one of the following types: float16, @@ -156,8 +156,8 @@ REG_OP(GEMM) * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n *@par Attributes: -*@li adj_x: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. -*@li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n +*@li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. +*@li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n *@par Outputs: *y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, @@ -175,6 +175,41 @@ REG_OP(BatchMatMul) .ATTR(adj_x2, Bool, false) .OP_END_FACTORY_REG(BatchMatMul) + +/** +* @brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n + +* @par Inputs: +* Three inputs, including: +* @li x1: A matrix Tensor. Must be one of the following types: float16, +* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. +* @li x2: A matrix Tensor. Must be one of the following types: float16, +* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n +* @li bias: A matrix Tensor. Must be one of the following types: float16, +* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n + +* @par Attributes: +* @li adj_x: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. +* @li adj_y: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n + +* @par Outputs: +* y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, +* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. Has the same shape length as "x1" and "x2" . \n + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator BatchMatmul. +*/ + +REG_OP(BatchMatMulV2) + .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .ATTR(adj_x1, Bool, false) + .ATTR(adj_x2, Bool, false) + .OP_END_FACTORY_REG(BatchMatMulV2) + + /** *@brief Computes half the L2 norm of a tensor without the sqrt . \n @@ -979,6 +1014,14 @@ REG_OP(MatrixDiagV2) .OUTPUT(output, TensorType::BasicType()) .OP_END_FACTORY_REG(MatrixDiagV2) +REG_OP(IndexAdd) + .INPUT(var, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) + .INPUT(indices, TensorType({DT_INT32})) + .INPUT(updates, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) + .OUTPUT(var_out, TensorType({DT_INT32, DT_INT8, DT_UINT8, DT_FLOAT32, DT_FLOAT16})) + .ATTR(axis, Int, 0) + .OP_END_FACTORY_REG(IndexAdd) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h index 0c6a5dff..a35cee03 100644 --- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index 35296870..5ec9d252 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -617,8 +617,7 @@ REG_OP(Conv2DBackpropFilterD) * (top, bottom, left, right) side of the input. *@li dilations: Optional. A list of 4 integers. The dilation factor for each * dimension of input. The dimension order is determined by the data format of -* "x". The N and C dimensions must be set to 1. The H and W dimensions must be -* set to 1 for int8 type. Defaults to [1, 1, 1, 1]. +* "x". The N and C dimensions must be set to 1. Defaults to [1, 1, 1, 1]. *@li groups: Optional. An integer of type int32. The number of blocked * connections from input channels to output channels. In_channels and * out_channels must both be divisible by "groups". Defaults to 1. @@ -652,6 +651,8 @@ REG_OP(Conv2DBackpropFilterD) | Offset_x | | [-128, 127] @endverbatim +* The W dimension of the input image supports cases exceeding 4096, but it may +* cause compilation errors. *\n * *@par Outputs: @@ -666,21 +667,6 @@ REG_OP(Conv2DBackpropFilterD) * out_width = (in_width + pad_left + pad_right - * (dilation_w * (filter_width - 1) + 1)) * / stride_w + 1 -* -*@attention Constraints: -*@li The following restrictions on the output must be met: -*@verbatim - | Output | Restrictions - ----------|-------------------------------- - | H == 1 | H * W(input) == H * W(filter) - | W == 1 | - ----------|-------------------------------- - | H != 1 | W(input) == W(filter) - | W == 1 | Only for Ascend310 Hi3796V300CS -@endverbatim -* "H * W (input)" indicates the image size after padding and "H * W (filter)" -* indicates the filter size after dilation."W(input)" and W(filter) indicate -* the same rule on the W dimension. *\n * *@par Quantization supported or not @@ -778,7 +764,7 @@ REG_OP(Conv2DCompress) * With the format "HWCN" , the data is stored in the order of: [filter_height, * filter_width, in_channels / groups, out_channels]. *@li offsets: A 4D tensor of x-y coordinates offset and mask. With the format -* "NHWC", the data is stored in the order of: [batch, in_height, in_width, +* "NHWC", the data is stored in the order of: [batch, out_height, out_width, * deformable_groups * filter_height * filter_width * 3]. *@li bias: An optional 1D tensor of additive biases to the filter outputs. * The data is stored in the order of: [out_channels]. @@ -822,25 +808,12 @@ REG_OP(Conv2DCompress) *@verbatim | Name | Field | Scope --------------------|--------|---------------------------- - | Input Image Size | H | [1, 100000] - | | W | [1, 4096] - --------------------|--------|---------------------------- - | Filter Size | H | [1, 255] - | | W | [1, 255] + | Input Image Size | H | [1, 100000 / filter_height] + | | W | [1, 4096 / filter_width] --------------------|--------|---------------------------- - | Stride | H | [1, 63] + | Filter Size | H | [1, 63] | | W | [1, 63] - --------------------|--------|---------------------------- - | Padding | Top | [0, 255] - | | Bottom | [0, 255] - | | Left | [0, 255] - | | Right | [0, 255] - ------------ -------|--------|---------------------------- - | Dilation | H | [1, 255] - | | W | [1, 255] @endverbatim -* "W(input)" indicate the image width after padding and W(filter) indicates the -* filter width after dilation. *\n * *@par Outputs: @@ -855,21 +828,7 @@ REG_OP(Conv2DCompress) * out_width = (in_width + pad_left + pad_right - * (dilation_w * (filter_width - 1) + 1)) * / stride_w + 1 -* -*@attention Constraints: -*@li The following restrictions on the output must be met: -*@verbatim - | Output | Restrictions - ----------|-------------------------------- - | H == 1 | H * W(input) == H * W(filter) - | W == 1 | - ----------|-------------------------------- - | H != 1 | W(input) == W(filter) - | W == 1 | Only for Ascend310 Hi3796V300CS -@endverbatim -* "H * W(input)" indicates the image size after padding and "H * W(filter)" -* indicates the filter size after dilation. "W(input)" and W(filter) indicate -* the same rule on the W dimension. +*\n * *@par Quantization supported or not *@li No @@ -1405,13 +1364,13 @@ REG_OP(Conv2DTransposeD) /** *@brief Computes the deformed convolution output with the expected input *@par Inputs: - * Four inputs: + * Two inputs: * @li x: A Tensor of type float16,float32 * @li offsets: A Tensor of type float16,float32.Deformation offset parameter. *@par Required Attributes: * @li strides: A tuple/list of 4 integers.The stride of the sliding window for * height and width for H/W dimension. - * @li pads: A tuple/list of 4 integers.Padding added to each dimension + * @li pads: A tuple/list of 4 integers.Padding added to H/W dimension * of the input. * @li ksize: A tuple/list of 2 integers.kernel size. *@par Attributes: @@ -1420,6 +1379,7 @@ REG_OP(Conv2DTransposeD) * of input. Defaults to [1, 1, 1, 1] * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. * @li deformable_groups: Specify the c-axis grouping number of input x. + * @li modulated: Specify version of DeformableConv2D, true means v2, false means v1 *@par Outputs: * y: A Tensor. A Tensor of type float16, float32. */ @@ -1433,7 +1393,69 @@ REG_OP(DeformableOffsets) .ATTR(dilations, ListInt, {1, 1, 1, 1}) .ATTR(data_format, String, "NCHW") .ATTR(deformable_groups, Int, 1) + .ATTR(modulated, Bool, true) .OP_END_FACTORY_REG(DeformableOffsets) +/** +*@brief Computes the gradients of DeformableOffsets with respect to input and offsets +*@par Inputs: + * Three inputs: + * @li grad: A Tensor of type float16,float32. gradients with respect to DeformableOffsets output + * @li x: A Tensor of type float16,float32. + * @li offsets: A Tensor of type float16,float32.Deformation offset parameter. +*@par Required Attributes: + * @li strides: A tuple/list of 4 integers.The stride of the sliding window for + * height and width for H/W dimension. + * @li pads: A tuple/list of 4 integers.Padding added to H/W dimension + * of the input. + * @li ksize: A tuple/list of 2 integers.kernel size. +*@par Attributes: + * Three attributes: + * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension + * of input. Defaults to [1, 1, 1, 1] + * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. + * @li deformable_groups: Specify the c-axis grouping number of input x. + * @li modulated: Specify version of DeformableConv2D, true means v2, false means v1. +*@par Outputs: + * grad_x: A Tensor of type float16, float32. Gradients with respect to input_x + * grad_offsets: A Tensor of type float16, float32. Gradients with respect to input_offsets +*/ +REG_OP(DeformableOffsetsGrad) + .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(offsets, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(grad_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(grad_offsets, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(pads, ListInt) + .REQUIRED_ATTR(ksize, ListInt) + .ATTR(dilations, ListInt, {1, 1, 1, 1}) + .ATTR(data_format, String, "NCHW") + .ATTR(deformable_groups, Int, 1) + .ATTR(modulated, Bool, true) + .OP_END_FACTORY_REG(DeformableOffsetsGrad) + +/** +*@brief Computes the deformed dilation output with the expected input +*@par Inputs: + * One inputs: + * @li x: A Tensor of type int8, float16, float32 +*@par Required Attributes: + * @li dilations: A tuple/list of integers. +*@par Attributes: + * Two attributes: + * @li padding_value: default value filling in blank + * @li pads: A tuple/list of integers. +*@par Outputs: + * y: A Tensor. A Tensor of type int8, float16, float32. +*/ +REG_OP(Dilation) + .INPUT(x, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_INT8, DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(dilations, ListInt) + .ATTR(pads, ListInt, {}) + .ATTR(padding_value, Float, 0.0) + .OP_END_FACTORY_REG(Dilation) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index a013fb33..20fdb90d 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1383,6 +1383,7 @@ REG_OP(DecodeWheelsTarget) *@attention Constraints: * Only computation of float16 data is supported. +* Note: when the class num per image * max_size_per_class is too big, will compile fail with ERROR-insufficient memory */ REG_OP(BatchMultiClassNonMaxSuppression) .INPUT(boxes, TensorType({DT_FLOAT16})) @@ -1495,6 +1496,13 @@ REG_OP(Sort) .ATTR(descending, Bool, false) .OP_END_FACTORY_REG(Sort) +REG_OP(PtIou) + .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(mode, String, "iou") + .OP_END_FACTORY_REG(PtIou) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index 35c4c7d4..482bca40 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -978,6 +978,241 @@ REG_OP(InHost) .OUTPUT(variance_sqrt, TensorType({DT_FLOAT})) .ATTR(epsilon, Float, 0.00001) .OP_END_FACTORY_REG(InHost) + +/** +* @brief perform instance normalization to x. \n + +* @par Inputs: +* Three inputs, including: +* @li x: A Tensor. Must be one of the following types: float16, float32, format is NC1HWC0. +* @li gamma: A Tensor. Must be one of the following types: float16, float32, format is ND. +* @li beta: A Tensor. Must be one of the following types: float16, float32, format is ND. + +* @par Attributes: +* @li data_format: An attribute of type String \n +* @li epsilon: An attribute of type Float, . \n + +* @par Outputs: +* @li y: A Tensor. Has the same type as "x", format is NC1HWC0. \n +* @li mean: A Tensor. Has the same type as "x", format is NC1HWC0 and the shape is [N, C1, 1, 1, C0]. \n +* @li variance: A Tensor. Has the same type as "x", format is NC1HWC0 and the shape is [N, C1, 1, 1, C0]. \n + +* @par Third-party framework compatibility +* Can be used by onnx InstanceNormalization +*/ +REG_OP(InstanceNorm) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(gamma, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(beta, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(variance, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(data_format, String) + .REQUIRED_ATTR(epsilon, Float) + .OP_END_FACTORY_REG(InstanceNorm) + +REG_OP(KlDivLossGrad) + .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(reduction, String, "mean") + .ATTR(log_target, Bool, false) + .OP_END_FACTORY_REG(KlDivLossGrad) + +/** +* @brief Computes l1_loss_grad or l1_loss_backward. \n + +* @par Inputs: +* Three inputs, including: +* @li grads: A Tensor. Must be one of the following types: float16, float32. +* Required. +* @li predict: A Tensor. Has the same type as "grads". Required. +* @li label: A Tensor. Has the same type as "grads". Required. \n + +* @par Attributes: +* @li reduction: An optional attribute of type String. Defaults to "mean". \n + +* @par Outputs: +* @li y: A Tensor. Has the same type as "x". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator L1LossGrad. +*/ +REG_OP(L1LossGrad) + .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(L1LossGrad) + +/** +* @brief Computes loss of lp, p=1,2,3.... + +* @par Inputs: +* @li predict: An ND tensor of type float16, float32. +* @li label: An ND tensor of type float16, float32. \n + +* @par Attributes: +* @li p: A required int attribute that decides which loss to compute, now the p only can be 1 to compute l1_loss. +* @li reduction: An optional string.Defaults to "mean". \n + +* @par Outputs: +* @li y: An ND tensor tensor with the same shape and type as "predict". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator LpLoss. +*/ +REG_OP(LpLoss) + .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(p, Int) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(LpLoss) + +/** +* @brief Computes gradients of mse loss. + +* @par Inputs: +* @li predict: An ND tensor of type float16, float32. +* @li label: An ND tensor of type float16, float32. +* @li dout: An ND tensor of type float16, float32. \n + +* @par Attributes: +* @li reduction: An optional string.Defaults to "mean". \n + +* @par Outputs: +* @li y: An ND tensor tensor with the same shape and type as "predict". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator MseLossGrad. +*/ +REG_OP(MseLossGrad) + .INPUT(predict, TensorType({DT_FLOAT32, DT_FLOAT16})) + .INPUT(label, TensorType({DT_FLOAT32, DT_FLOAT16})) + .INPUT(dout, TensorType({DT_FLOAT32, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT32, DT_FLOAT16})) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(MseLossGrad) + +/** +* @brief Computes mse loss. +* @par Inputs: +* two inputs, including: +* @li predict: An ND Tensor of dtype float16 or float32. +* @li label: An ND Tensor of dtype float16 or float32.\n +* +* @par Attributes: +* @li reduction:An optional str from sum, none, mean, Defaults to "mean".\n +* +* @par Outputs: +* @li y: when reduction=sum/mean, y is scale. when reduction=none, y has +* same type and shape as "predict".\n +*/ +REG_OP(MseLoss) + .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(label, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(MseLoss) + +/** +* @brief Calculates the reversed outputs of the function "smooth_l1_loss_v2". \n + +* @par Inputs: +* Three Inputs, including: +* @li predict: A Tensor. Must be one of the following types: +* float16, float32. +* @li label: A Tensor. Has the same type as "predict". +* @li dout: A Tensor. Has the same type as "predict". \n + +* @par Attributes: +* Two Attributes, including: +* @li sigma: An optional float. Defaults to 1.0. \n + +* @li reduction: An optional string. Defaults to "mean", +* Must be one of the following: "none", "mean", "sum". \n + +* @par Outputs: +* @li gradient: A Tensor. Has the same type as "predict". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator SmoothL1LossBackward. +*/ +REG_OP(SmoothL1LossGradV2) + .INPUT(predict, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(label, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(dout, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(gradient, TensorType({DT_FLOAT, DT_FLOAT16})) + .ATTR(sigma, Float, 1.0) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(SmoothL1LossGradV2) + +/** +* @brief Creates a criterion that uses a squared term if the absolute +* element-wise error falls below beta and an L1 term otherwise. It is +* less sensitive to outliers than the MSELoss and in some cases prevents +* exploding gradients. + +* @par Inputs: +* @li predict: A multi-dimensional Tensor of type float16 or float32, +* specifying the predictive value. \n +* @li label: A multi-dimensional Tensor of type float16 or float32, +* specifying the target value. \n + +* @par Attributes: +* @li sigma: An optional int. Specifies the threshold of loss. Defaults +* to "1.0". \n +* @li reduction: An optional str. Specifies the reduction to apply to +* the output: 'none' | 'mean' | 'sum'. 'none': no reduction will be applied, +* 'mean': the sum of the output will be divided by the number of elements in +* the output,'sum': the output will be summed. Default: 'mean'. \n + +* @par Outputs: +* @li loss: Indicates the loss between the predictive value and target value. +* Has the same dimensions as "predict". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator smooth_l1_loss. \n +*/ +REG_OP(SmoothL1LossV2) + .INPUT(predict, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(label, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .OUTPUT(loss, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .ATTR(sigma, Float, 1.0) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(SmoothL1LossV2) + +/** +* @brief Computes gradients of sigmoid_cross_entropy_with_logits_v2. + +* @par Inputs: +* @li predict: An ND tensor of type float16, float32. +* @li target: An ND tensor of type float16, float32. +* @li dout: An ND tensor of type float16, float32. +* @li weight: An optional ND tensor of type float16, float32. +* @li pos_weight: An optional ND tensor of type float16, float32. \n + +* @par Attributes: +* @li reduction: An optional string.Defaults to "mean". \n + +* @par Outputs: +* @li gradient: An ND tensor tensor with the same shape and type as "predict". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator SigmoidCrossEntropyWithLogitsGrad. +*/ +REG_OP(SigmoidCrossEntropyWithLogitsGradV2) + .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(dout, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(pos_weight, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_ops.h b/third_party/fwkacllib/inc/ops/nn_ops.h index 9edc469a..16552eee 100644 --- a/third_party/fwkacllib/inc/ops/nn_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index ab35ba47..f16a59df 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -182,6 +182,48 @@ REG_OP(AvgPool3D) .ATTR(data_format, String, "NDHWC") .OP_END_FACTORY_REG(AvgPool3D) + +/** +*@brief Performs average pooling on the input. + +*@par Inputs: +*@li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double. +*@li filter: An optional tensor of type float16, float32, double, fractal_z_3d layout. +*@li multiplier: An optional tensor of float16, float32, double. + +*@par Attributes: +*@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor. +*@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor. +*@li pads: List of ints, implicit zero paddings on both sides of the input. +*@li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. +*@li count_include_pad: When true, will include the zero-padding in the averaging calculation. +*@li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. +*@li data_format: A string, format of input data . \n + +*@par Outputs: +*y: The average pooled output tensor . \n + +*@attention Constraints: +*@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator AvgPool3D. +*/ +REG_OP(AvgPool3DD) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .OPTIONAL_INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .OPTIONAL_INPUT(multiplier, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) + .REQUIRED_ATTR(ksize, ListInt) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(pads, ListInt) + .ATTR(ceil_mode, Bool, false) + .ATTR(count_include_pad, Bool, true) + .ATTR(divisor_override, Int, 0) + .ATTR(data_format, String, "NDHWC") + .OP_END_FACTORY_REG(AvgPool3DD) + + /** *@brief Performs max_pool_ext2 on the input . \n @@ -477,8 +519,9 @@ REG_OP(MaxPoolV2) *@par Inputs: * One input: -*x: An NC1HWC0 Tensor. Supported type: float, double, int32, - * uint8, int16, int8, int64, uint16, half, uint32, uint64 . \n +*x: An 4D Tensor. Supported type: float, double, int32, + * uint8, int16, int8, int64, uint16, half, uint32, uint64. + * Must set the format, supported format list ["NCHW, NHWC"]. \n *@par Attributes: *@li ksize: A required list of int8, int16, int32, or int64 values, @@ -517,10 +560,12 @@ REG_OP(MaxPoolWithArgmax) *@par Inputs: * Three inputs, including: -*@li x: An NC1HWC0 tensor. Supported type: float, double, int32, +*@li x: An 4d tensor. Supported type: float, double, int32, * uint8, int16, int8, int64, uint16, half, uint32, uint64. -*@li grad: An NC1HWC0 tensor. Supported type: float, double, int32, + * Must set the format, supported format list ["NCHW, NHWC"] +*@li grad: An 4d tensor. Supported type: float, double, int32, * uint8, int16, int8, int64, uint16, half, uint32, uint64. + * Must set the format, supported format list ["NCHW, NHWC"] *@li argmx: An NC1HWC0 tensor of type int32 or int64 . \n *@par Attributes: @@ -1107,7 +1152,7 @@ REG_OP(AvgPool1DD) *@par Inputs: * One input: -*x: An NC1HWC0 Tensor of type float16. +*x: An 4d Tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"]. *@par Attributes: *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for * each dimension of the input tensor. No default value. @@ -1148,9 +1193,9 @@ REG_OP(MaxPoolWithArgmaxV2) *@par Inputs: * Three inputs, including: -*@li x: An NC1HWC0 tensor of type float16. -*@li grad: An NC1HWC0 tensor of type float16. -*@li argmx: An NC1HWC0 tensor of type uint16 or int64 . \n +*@li x: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"] +*@li grad: An 4d tensor of type float16. Must set the format, supported format list ["NCHW, NHWC"] +*@li argmx: An 4d tensor of type uint16 or int64. Must set the format, supported format list ["NCHW, NHWC"] \n *@par Attributes: *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for @@ -1291,5 +1336,167 @@ REG_OP(MaxPoolV3Grad) .ATTR(global_pooling, Bool, false) .ATTR(ceil_mode, Bool, false) .OP_END_FACTORY_REG(MaxPoolV3Grad) + +/** +*@brief Performs dilation2d on the input . \n + +*@par Inputs: +*x: A tensor of shape is 4d, format is support NHWC. +*filter: A tensor of shape is 3d, the type is same with x, +and the c dimension is same with x. \n + +*@par Attributes: +*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. +*@li rates: A required list of 4 ints. The rates of the N and C dimensions are 1. +*@li padding_mode: A optional string. Defaults to "SAME", it support SAME and VALID. +*@li pads: A optional list of 4 ints. \n + +*@par Outputs: +*y: The output tensor. Has the same type and format as input "x" . \n + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Dilation2D. +*/ +REG_OP(Dilation2D) + .INPUT(x,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .INPUT(filter,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .OUTPUT(y,TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, DT_INT8, DT_UINT16})) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(rates, ListInt) + .ATTR(padding_mode, String, "SAME") + .ATTR(pads, ListInt, {0,0,0,0}) + .OP_END_FACTORY_REG(Dilation2D) + +/** +* @brief Applies a 2D adaptive average pooling over +* an input signal composed of several input planes. \n + +* @par Inputs: +* One input, including: +* @li x: A Tensor. Must be one of the following data types: +* float16, float32. \n + +* @par Attributes: +* @li output_size: A required list of 2 ints +* specifying the size (H,W) of the output tensor. \n + +* @par Outputs: +* @li y: A Tensor. Has the same data type as "x" \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator AdaptiveAvgPool2d. +*/ +REG_OP(AdaptiveAvgPool2d) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .REQUIRED_ATTR(output_size, ListInt) + .OP_END_FACTORY_REG(AdaptiveAvgPool2d) + +/** +* @brief Compute gradients of adaptive averagev2 pooling function. + +* @par Inputs: +* @li input_grad: A NCHW Tensor. Must be one of the following data types: +* float16, float32. + +* @par Attributes: +* @li orig_input_shape: A required tuple or list of type int32. + +* @par Outputs: +* @li output_grad: A tensor with the same shape and type as "orig_input_shape". + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator AdaptiveAvgPool2dGrad. +*/ +REG_OP(AdaptiveAvgPool2dGrad) + .INPUT(input_grad, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(output_grad, TensorType({DT_FLOAT, DT_FLOAT16})) + .REQUIRED_ATTR(orig_input_shape, ListInt) + .OP_END_FACTORY_REG(AdaptiveAvgPool2dGrad) + +/** +* @brief Performs the backpropagation of MaxPoolWithGradArgmaxV1. + +* @par Inputs: +* Three inputs, including: +* @li x: An NC1HWC0 tensor of type float16. +* @li grad: An NC1HWC0 tensor of type float16. +* @li argmax: An NC1HWC0 tensor of type uint16 or int64. \n + +* @par Attributes: +* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for +* each dimension of the input tensor. No default value. +* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for +* each dimension of the input tensor. No default value. +* @li pads: A required listint. \n + +* @par Outputs: +* y: A Tensor. Has the same type and format as input "x". \n + +* @attention Constraints: +* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. +* @li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1 +* @li "pads" is listint. +* @li "ceil_mode" defaults to False. +* @li "data_format" defaults to "NC1HWC0". \n + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV1. +*/ + +REG_OP(MaxPoolGradWithArgmaxV1) + .INPUT(x, TensorType({DT_FLOAT16})) + .INPUT(grad, TensorType({DT_FLOAT16})) + .INPUT(argmax, TensorType({DT_UINT16})) + .OUTPUT(y, TensorType({DT_FLOAT16})) + .REQUIRED_ATTR(ksize, ListInt) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(pads, ListInt) + .ATTR(dtype, Int, 3) + .ATTR(dilation, ListInt, {1, 1, 1, 1}) + .ATTR(ceil_mode, Bool, false) + .OP_END_FACTORY_REG(MaxPoolGradWithArgmaxV1) + +/** +* @brief Performs max pooling on the input and outputs both max values and indices. + +* @par Inputs: +* One input: +* x: An NC1HWC0 Tensor of type float16. \n + +* @par Attributes: +* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for +* each dimension of the input tensor. No default value. +* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for +* each dimension of the input tensor. No default value. +* @li pads: A required string. No default value. \n + +* @par Outputs: +* y: A Tensor. Has the same type and format as input "x". +* argmax: A Tensor. type:uint16, format:NC1HWC0. \n + +* @attention Constraints: +* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. +* @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, +* strides[2] <= 63, strides[2] >= 1. +* @li "pads" is listint. +* @li "ceil_mode" defaults to False. +* @li "data_format" defaults to "NC1HWC0". \n + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator MaxPoolWithArgmaxV1. +*/ +REG_OP(MaxPoolWithArgmaxV1) + .INPUT(x, TensorType({DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT16})) + .OUTPUT(argmax, TensorType({DT_UINT16})) + .REQUIRED_ATTR(ksize, ListInt) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(pads, ListInt) + .ATTR(dtype, Int, 3) + .ATTR(dilation, ListInt, {1, 1, 1, 1}) + .ATTR(ceil_mode, Bool, false) + .OP_END_FACTORY_REG(MaxPoolWithArgmaxV1) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h index 047fd6da..92074872 100644 --- a/third_party/fwkacllib/inc/ops/nn_training_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/no_op.h b/third_party/fwkacllib/inc/ops/no_op.h index 7834591c..b27b1fa0 100644 --- a/third_party/fwkacllib/inc/ops/no_op.h +++ b/third_party/fwkacllib/inc/ops/no_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index e0e5dfc6..e0897280 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -640,6 +640,208 @@ REG_OP(Mish) .OUTPUT(y, TensorType({ DT_FLOAT,DT_FLOAT16 })) .OP_END_FACTORY_REG(Mish) +/** + * @brief pytorch hardtanh_backward operator. + * + * @par Inputs: + * 2 inputs, including: + * @li result, minimum tensor of the linear region range, + * datatype: float16/float32, format:ND/5HD. + * @li grad, maximum tensor of the linear region range, + * datatype:float16/float32, format:ND/5HD. \n + + * @par Attributes: + * 2 attributes, including: + * @li min_val, minimum value of the linear region range, datatype:float. + * @li max_val, maximum value of the linear region range, datatype:float. \n + + * @par Outputs: + * 1 output, including: + * @li y, hardtanh_backward output tensor, datatype and format is same as + * input result. \n + + * @attention Constraints: + * This operator only supports dataType: float16/float32, format: ND/5HD. \n + + * @par Third-party framework compatibility + * Compatible with the Pytorch operator HardtanhGrad. + */ +REG_OP(HardtanhGrad) + .INPUT(result, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "First operand." */ + .INPUT(grad, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "Second operand." */ + .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT })) /* "Result, has same element type as two inputs" */ + .ATTR(min_val, Float, -1.0) + .ATTR(max_val, Float, 1.0) + .OP_END_FACTORY_REG(HardtanhGrad) + +/** +* @brief Calculates the softplus loss function with attributes of beta and threshold. \n + +* @par Inputs: +* One inputs, including: +* @li x: A mutable Tensor. Must be one of the following types: +* float16, float32. \n + +* @par Attributes: +* @li beta: An optional float. Defaults to "1.0" \n + +* @li threshold: An optional float. Defaults to "20.0" \n + +* @par Outputs: +* @li y: A mutable Tensor. Has the same type as "x" \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Softplus. +*/ +REG_OP(SoftplusV2) + .INPUT(x, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .OUTPUT(y, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .ATTR(beta, Float, 1.0) + .ATTR(threshold, Float, 20.0) + .OP_END_FACTORY_REG(SoftplusV2) + +/** +* @brief Calculates the reversed outputs of the function "softplus_v2". \n + +* @par Inputs: +* Two inputs, including: +* @li input_gradients: A mutable Tensor. Must be one of the following types: +* float16, float32. +* @li input_features: A mutable Tensor of the same type as "input_gradients" \n + +* @par Attributes: +* @li beta: An optional float. Defaults to "1.0" \n + +* @li threshold: An optional float. Defaults to "20.0" \n + +* @par Outputs: +* @li output_backprops: A mutable Tensor. Has the same type as "input_gradients" \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator SoftplusGrad. +*/ +REG_OP(SoftplusV2Grad) + .INPUT(input_gradients, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .INPUT(input_features, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .OUTPUT(output_backprops, TensorType({ DT_FLOAT, DT_FLOAT16 })) + .ATTR(beta, Float, 1.0) + .ATTR(threshold, Float, 20.0) + .OP_END_FACTORY_REG(SoftplusV2Grad) + +/** + * @brief ThresholdedRelu takes one input data (Tensor) and produces one output data (Tensor) + * where the rectified linear function, y = x for x > alpha, y = 0 otherwise, is applied to the tensor elementwise. + * + * @par inputs + * one input including: + * @li x: input A Tensor. Must be one of the following types: float32, float16 + * + * @par output + * one output including: + * @li y:A Tensor of the same type as x + * + */ +REG_OP(ThresholdedRelu) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(alpha, Float, 1.0) + .OP_END_FACTORY_REG(ThresholdedRelu) + +/** +* @brief Calculate the hard shrinkage function. \n + +* @par Inputs: +* One inputs, including: +* @li input_x: A tensor. Must be one of the following types: +* float16, float32. \n + +* @par Attributes: +* @li lambd: An optional float. Defaults to 0.5. \n + +* @par Outputs: +* y: A Tensor with the same dtype and shape of input_x's. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Hardshrink. \n +*/ +REG_OP(HardShrink) + .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(lambd, Float, 0.5) + .OP_END_FACTORY_REG(HardShrink) + +/** +* @brief Calculate the hard sigmoid function. \n + +* @par Inputs: +* One inputs, including: +* @li input_x: A tensor. Must be one of the following types: +* float16, float32, int32. \n + +* @par Attributes: +* @li alpha: An optional float. Defaults to 0.16666666. \n +* @li beta: An optional float. Defaults to 0.5. \n + +* @par Outputs: +* y: A Tensor with the same dtype and shape of input_x's. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Hardsigmoid. \n +*/ +REG_OP(HardSigmoid) + .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16})) + .ATTR(alpha, Float, 0.16666666) + .ATTR(beta, Float, 0.5) + .OP_END_FACTORY_REG(HardSigmoid) + +/** +* @brief Calculate the soft shrinkage function. \n + +* @par Inputs: +* One inputs, including: +* @li input_x: A tensor. Must be one of the following types: +* float16, float32. \n + +* @par Attributes: +* @li lambd: An optional float. Defaults to 0.5. \n + +* @par Outputs: +* y: A Tensor with the same dtype and shape of input_x's. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator Softshrink. \n +*/ +REG_OP(SoftShrink) + .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(lambd, Float, 0.5) + .OP_END_FACTORY_REG(SoftShrink) + +/** +* @brief Calculate the reversed outputs of the function "soft_shrink". \n + +* @par Inputs: +* Two inputs, including: +* @li input_grad: A tensor. Must be one of the following types: +* float16, float32. \n +* @li input_x: A tensor of the same dtype as "input_grad". \n + +* @par Attributes: +* @li lambd: An optional float. Defaults to 0.5. \n + +* @par Outputs: +* y: A Tensor of the same dtype and shape as "input_graxd". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator SoftShrinkGrad. \n +*/ +REG_OP(SoftShrinkGrad) + .INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(lambd, Float, 0.5) + .OP_END_FACTORY_REG(SoftShrinkGrad) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h index 8d7ef9f9..f36d2935 100644 --- a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h +++ b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/outfeed_ops.h b/third_party/fwkacllib/inc/ops/outfeed_ops.h index e0b783bc..53b9d701 100644 --- a/third_party/fwkacllib/inc/ops/outfeed_ops.h +++ b/third_party/fwkacllib/inc/ops/outfeed_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h index f746b3b3..8d71c5cd 100644 --- a/third_party/fwkacllib/inc/ops/pad_ops.h +++ b/third_party/fwkacllib/inc/ops/pad_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -161,7 +161,7 @@ REG_OP(Pad) *@brief Pads a tensor . \n *@par Inputs: -*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n +*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n *@par Attributes: *paddings: An optional "vector>". Defaults to "{}". @@ -180,8 +180,8 @@ REG_OP(Pad) * Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead. */ REG_OP(PadD) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) .REQUIRED_ATTR(paddings, ListListInt) .OP_END_FACTORY_REG(PadD) @@ -213,7 +213,7 @@ REG_OP(PadV2) *@brief Pads a tensor . \n *@par Inputs: -*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32 . \n +*x: A Tensor. Must be one of the following types: float16, float32, int32 . \n *constant_values: A Tensor. Must have the same type as input. *@par Attributes: @@ -227,10 +227,7 @@ REG_OP(PadV2) *y: A Tensor of the same type as "x" . \n *@par Third-party framework compatibility: -* Compatible with TensorFlow operator Pad. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead. +* Compatible with TensorFlow operator PadV2. */ REG_OP(PadV2D) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) @@ -403,5 +400,46 @@ REG_OP(EmbeddingRankId) .ATTR(mode, String, "mod") .OP_END_FACTORY_REG(EmbeddingRankId) +/** +* @brief Fill the value to a tensor has the specified shape. + +* @par Inputs: +* One inputs, including: +* @li dims: An Tensor, specify the shape that the value to fill. + +* @par Attributes: +* @li value: An optional float value. Defaults to 0.0. + +* @par Outputs: +* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value. + +* @par Third-party framework compatibility +* Compatible with the ONNX operator ConstantOfShape. +*/ +REG_OP(FillV2) + .INPUT(dims, TensorType({DT_INT16, DT_INT32, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) + .ATTR(value, Float, 0) + .OP_END_FACTORY_REG(FillV2) + +/** +* @brief Fill the value to a tensor has the specified shape. + +* @par Attributes: +* @li value: An optional float value. Defaults to 0.0. + +* @li dims: An required listInt to specify the shape that the value to fill. + +* @par Outputs: +* @li y: A Tensor. Has the shape specify by attr shape, and full of the value specify by attr value. + +* @par Third-party framework compatibility +* Compatible with the ONNX operator ConstantOfShape. +*/ +REG_OP(FillV2D) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64})) + .ATTR(value, Float, 0) + .REQUIRED_ATTR(dims, ListInt) + .OP_END_FACTORY_REG(FillV2D) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/parsing_ops.h b/third_party/fwkacllib/inc/ops/parsing_ops.h index 5c7adfd8..9a5cf504 100644 --- a/third_party/fwkacllib/inc/ops/parsing_ops.h +++ b/third_party/fwkacllib/inc/ops/parsing_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h index b53cfeb6..806e28df 100644 --- a/third_party/fwkacllib/inc/ops/quantize_ops.h +++ b/third_party/fwkacllib/inc/ops/quantize_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/ragged_array_ops.h b/third_party/fwkacllib/inc/ops/ragged_array_ops.h index 9b31aa8e..20484623 100644 --- a/third_party/fwkacllib/inc/ops/ragged_array_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_array_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h index 13488a25..020e3da4 100644 --- a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/ragged_math_ops.h b/third_party/fwkacllib/inc/ops/ragged_math_ops.h index 8af4f867..258b0ca1 100644 --- a/third_party/fwkacllib/inc/ops/ragged_math_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_math_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h index b46da435..e2b00ce3 100644 --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -495,6 +495,60 @@ REG_OP(ShuffleChannel) DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64})) .ATTR(group, Int, 1) .OP_END_FACTORY_REG(ShuffleChannel) + +/** + * @briefGenerate a tensor of samples from a multinomial + * distribution according to the probabilities of each of + * the possible outcomes. + * + * @par inputs + * one input including: + * @li x:Input tensor with shape [batch_size, class_size], + * where class_size is the number of all possible outcomes. + * Each value along the axis zero represents the unnormalized + * log-probability of each corresponding outcome in a batch. + * + * @par output + * one output including: + * @li y:Output tensor with shape [batch_size, sample_size], + * where sample_size is the number of times to sample. + * Each value along the axis zero represents the outcome of + * the corresponding sample in a batch. + * + */ +REG_OP(MultinomialFuss) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_FLOAT64})) + .OUTPUT(y, TensorType({DT_INT32, DT_INT64})) + .ATTR(dtype, Int, 6) + .ATTR(sample_size, Int, 1) + .ATTR(seed, Float, 0) + .OP_END_FACTORY_REG(MultinomialFuss) + +/** +* @brief During training, randomly zeroes some of the elements of the input tensor +* with probability +* +* @par Inputs: +* @li x: A ND Tensor. Must be one of the following data types: Float, Float16 +* @li seed: A ND Tensor. Must be one of the following data types: Float +* +* @par Attributes: +* @li p: probability of an element to be zeroed +* +* @par Outputs: +* @li y: A tensor with the same shape and type as "x". +* @li mask: A tensor with the same shape and type as "x". +* @li new_seed: A tensor with the same shape and type as "seed". +*/ + +REG_OP(DropoutV2) + .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT })) + .INPUT(seed, TensorType({ DT_FLOAT })) + .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT })) + .OUTPUT(mask, TensorType({ DT_FLOAT })) + .OUTPUT(seed, TensorType({ DT_FLOAT })) + .REQUIRED_ATTR(p, Float) + .OP_END_FACTORY_REG(DropoutV2) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index 6f44093e..0b114134 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -635,8 +635,8 @@ REG_OP(ReduceMin) * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead. */ REG_OP(ReduceMinD) - .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) - .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8,DT_INT32})) .REQUIRED_ATTR(axes, ListInt) .ATTR(keep_dims, Bool, false) .OP_END_FACTORY_REG(ReduceMinD) @@ -821,7 +821,7 @@ Defaults to "0.00001" . \n *batch_ variance: A Tensor of type float32 for the result variance . \n *@attention Constraints: -*For Ascend 310, the result accuracy fails to reach 1 due to the square root instruction. +*For Ascend 310, the result accuracy fails to reach 0.001 due to the square root instruction. */ REG_OP(INInferV2) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -882,7 +882,7 @@ REG_OP(INTrainingReduceV2) *@attention Constraints: *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. * This operator is used in conjunction with INTrainingReduceV2. -*@li For Ascend 310, the result accuracy fails to reach 1 due to the square root instruction. +*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. */ REG_OP(INTrainingUpdateV2) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -965,7 +965,7 @@ for the updated variance. *@attention Constraints: *@li This operator is a InstanceNorm fusion operator for updating the moving averages for training. * This operator is used in conjunction with GNTrainingUpdate. -*@li For Ascend 310, the result accuracy fails to reach 1 due to the square root instruction. +*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. */ REG_OP(GNTrainingUpdate) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -982,6 +982,41 @@ REG_OP(GNTrainingUpdate) .OUTPUT(batch_variance, TensorType({DT_FLOAT})) .OP_END_FACTORY_REG(GNTrainingUpdate) +/** +* @brief Calculates the standard deviation and average value of Tensors. + +* @par Inputs: +* @li x: A Tensor. Must be one of the following types: +* float16, float32. \n + +* @par Attributes: +* Three Attributes, including: +* @li dim: An optional listint, Defaults to "None". \n + +* @li unbiased: An optional bool. Defaults to "True". +* If "True", Use Bessel Correction. +* If "False", Do not use Bessel Correction. \n + +* @li keepdim: An optional bool. Defaults to "False". +* If "True", Keep the original tensor dimension. +* If "False", Do not keep the original tensor dimension. \n + +* @par Outputs: +* Two Outputs, including: +* @li y1: A Tensor. Has the same type as "x". +* @li y2: A Tensor. Has the same type as "x". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator ReduceStd. +*/ +REG_OP(ReduceStd) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y1, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y2, TensorType({DT_FLOAT, DT_FLOAT16})) + .ATTR(dim, ListInt, {}) + .ATTR(unbiased, Bool, true) + .ATTR(keepdim, Bool, false) + .OP_END_FACTORY_REG(ReduceStd) } //namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/resource_variable_ops.h b/third_party/fwkacllib/inc/ops/resource_variable_ops.h index 1b60d42a..74ac83f8 100644 --- a/third_party/fwkacllib/inc/ops/resource_variable_ops.h +++ b/third_party/fwkacllib/inc/ops/resource_variable_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h index 84723872..cfd20479 100644 --- a/third_party/fwkacllib/inc/ops/rnn.h +++ b/third_party/fwkacllib/inc/ops/rnn.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -255,6 +255,59 @@ REG_OP(DynamicRNN) .OP_END_FACTORY_REG(DynamicRNN) /** +*@brief: DynamicLSTMV2 calculation. +*@par Inputs: +*ten inputs: +*@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. +*@li cont:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. +*@li w_xc_x_static:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. +*@li h0:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li c0:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. +*@li mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n + +*@par Attributes: +*@li num_output:An integer identifying the num projection in the op. Default to 0. +*@li expose_hidden:An bool identifying the expose_hidden in the op. Default to flase. +*@li need_output_last:An bool identifying the time major in the op. Default to true. +*@li forget_bias:An float identifying the forget bias in the op. Default to 0. + +*@par Outputs: +*eight outputs: +*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li last_output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li last_output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*/ +REG_OP(DynamicLSTMV2) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(cont, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(w_xc_x_static, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(h0, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(c0, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wci, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wcf, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(wco, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(mask, TensorType({DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(last_output_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(last_output_c, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(num_output, Int, 0) + .ATTR(expose_hidden, Bool, false) + .ATTR(need_output_last, Bool, false) + .ATTR(forget_bias, Float, 0.0) + .OP_END_FACTORY_REG(DynamicLSTMV2) + +/** *@brief: LSTMInputGrad calculation. *@par Inputs: *ten inputs: \n @@ -475,9 +528,9 @@ REG_OP(BasicRNNCell) .OP_END_FACTORY_REG(BasicRNNCell) /** -*@brief: DynamicGRU calculation. +*@brief DynamicGRU calculation. *@par Inputs: -*seven inputs: \n +*seven inputs: *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. *@li w:Must be one of the following types: float16. The format must be FRACTAL_Z. *@li b:Must be one of the following types: float16, float32. The format must be ND. @@ -497,7 +550,7 @@ REG_OP(BasicRNNCell) *@li is_training:An bool identifying is training in the op. Default to true. *@par Outputs: -*five outputs: \n +*five outputs: *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. @@ -531,9 +584,9 @@ REG_OP(DynamicGRU) .OP_END_FACTORY_REG(DynamicGRU) /** -*@brief: DynamicGRUV2 calculation. +*@brief DynamicGRUV2 calculation. *@par Inputs: -*seven inputs: \n +*seven inputs: *@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. *@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z. *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. @@ -555,7 +608,7 @@ REG_OP(DynamicGRU) *@li is_training:An bool identifying is training in the op. Default to true. *@par Outputs: -*six outputs: \n +*six outputs: *@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. @@ -592,6 +645,68 @@ REG_OP(DynamicGRUV2) .ATTR(is_training, Bool, true) .OP_END_FACTORY_REG(DynamicGRUV2) + +/** +*@brief DynamicGRUV2Hidden calculation. +*@par Inputs: +*five inputs: +*@li x_weight_input:Must be one of the following types: float32. The format must be FRACTAL_NZ. +*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. +*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. +*@li seq_length:Must be one of the following types: int32. The format must be ND. +*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Attributes: +*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". +Only UNIDIRECTIONAL is currently supported. +*@li cell_depth:An integer identifying the cell depth in the op. Default to 1. +*@li keep_prob:An float identifying the keep prob in the op. Default to 1. +*@li cell_clip:An float identifying the cell clip in the op. Default to -1. +*@li num_proj:An integer identifying the num projection in the op. Default to 0. +*@li time_major:An bool identifying the time major in the op. Default to true. +*@li activation:An string identifying the type of activation function in the op. Default to "tanh". +Only tanh is currently supported. +*@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. +*@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true. +*@li is_training:An bool identifying is training in the op. Default to true. + +*@par Outputs: +*six outputs: +*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(DynamicGRUV2Hidden) + .INPUT(x_weight_input, TensorType({DT_FLOAT32})) + .INPUT(weight_hidden, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) + .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(update, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(new, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(direction, String, "UNIDIRECTIONAL") + .ATTR(cell_depth, Int, 1) + .ATTR(keep_prob, Float, 1.0) + .ATTR(cell_clip, Float, -1.0) + .ATTR(num_proj, Int, 0) + .ATTR(time_major, Bool, true) + .ATTR(activation, String, "tanh") + .ATTR(gate_order, String, "zrh") + .ATTR(reset_after, Bool, true) + .ATTR(is_training, Bool, true) + .OP_END_FACTORY_REG(DynamicGRUV2Hidden) + + /** *@brief: DynamicGRUV2Grad calculation. *@par Inputs: @@ -618,7 +733,6 @@ REG_OP(DynamicGRUV2) *@li cell_clip:An float identifying the cell clip in the op. Default to -1. *@li num_proj:An integer identifying the num projection in the op. Default to 0. *@li time_major:An bool identifying the time major in the op. Default to true. -*@li bias_type:An string identifying the type of bias_type function in the op. Default to "double_bias". *@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. *@li reset_after:An bool identifying whether to apply reset gate after matrix multiplication. Default to true. @@ -630,6 +744,9 @@ REG_OP(DynamicGRUV2) *@li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DynamicGRUV2Grad) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -658,7 +775,6 @@ REG_OP(DynamicGRUV2Grad) .ATTR(cell_clip, Float, -1.0) .ATTR(num_proj, Int, 0) .ATTR(time_major, Bool, true) - .ATTR(bias_type, String, "double_bias") .ATTR(gate_order, String, "zrh") .ATTR(reset_after, Bool, true) .OP_END_FACTORY_REG(DynamicGRUV2Grad) @@ -667,7 +783,7 @@ REG_OP(DynamicGRUV2Grad) *@brief: GRUV2HiddenGrad calculation. *@par Inputs: *nine inputs: \n -*@li weight_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. @@ -678,6 +794,7 @@ REG_OP(DynamicGRUV2Grad) *@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@par Attributes: +*@li t_state:An Int identifying the current t state. Default to [0, 4]. *@li gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. *@par Outputs: @@ -685,10 +802,12 @@ REG_OP(DynamicGRUV2Grad) *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ -REG_OP(GRUV2HiddenGrad) - .INPUT(weight_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) +REG_OP(GRUV2HiddenGradCell) + .INPUT(dh_pre_t, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -699,8 +818,42 @@ REG_OP(GRUV2HiddenGrad) .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(t_state, Int, 0) .ATTR(gate_order, String, "zrh") - .OP_END_FACTORY_REG(GRUV2HiddenGrad) + .OP_END_FACTORY_REG(GRUV2HiddenGradCell) + +/** +* @brief Calculates the reversed outputs of the function "embedding". \n + +* @par Inputs: +* Two inputs, including: +* @li grad: A mutable Tensor of word grad. Must be one of the following types: +* float32. +* @li indices: A mutable word index Tensor of the int32 type.\n + +* @par Attributes: +* @li num_weights: An int attr which use to judge how many words in dict. \n + +* @li padding_idx: An int attr judge which word to fill zeros. Defaults to "-1". \n + +* @li scale_grad_by_freq: An optional bool. Defaults to "False". +* If "True", "grad_weight" will be scale by word_frequency. +* If "False", "grad_weight" will not be scale by word_frequency. \n + +* @par Outputs: +* @li grad_weight: A mutable output Tensor of new word grad has the same type as "grads". \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator EmbeddingDenseGrad. +*/ +REG_OP(EmbeddingDenseGrad) + .INPUT(grad, TensorType({ DT_FLOAT32 })) /* "First operand." */ + .INPUT(indices, TensorType({ DT_INT32 })) /* "Second operand." */ + .OUTPUT(y, TensorType({ DT_FLOAT32 })) /* "Result, has same element type as two inputs" */ + .REQUIRED_ATTR(num_weights, Int) + .ATTR(padding_idx, Int, -1) + .ATTR(scale_grad_by_freq, Bool, false) + .OP_END_FACTORY_REG(EmbeddingDenseGrad) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ diff --git a/third_party/fwkacllib/inc/ops/rpn_ops.h b/third_party/fwkacllib/inc/ops/rpn_ops.h index b7649a44..089af326 100644 --- a/third_party/fwkacllib/inc/ops/rpn_ops.h +++ b/third_party/fwkacllib/inc/ops/rpn_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/save_ops.h b/third_party/fwkacllib/inc/ops/save_ops.h index 0ce473b7..5ce6c2e0 100644 --- a/third_party/fwkacllib/inc/ops/save_ops.h +++ b/third_party/fwkacllib/inc/ops/save_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/sdca_ops.h b/third_party/fwkacllib/inc/ops/sdca_ops.h index cbd9839d..34c6a268 100644 --- a/third_party/fwkacllib/inc/ops/sdca_ops.h +++ b/third_party/fwkacllib/inc/ops/sdca_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index 2c99e82e..dee9e0f7 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -797,6 +797,34 @@ REG_OP(SliceD) .OP_END_FACTORY_REG(SliceD) /** +*@brief Extracts a slice from a tensor. +* This operation extracts a slice of size "size" from a tensor "x" +* starting at the location specified by "begin" . \n + +*@par Inputs: +*@li x: A Tensor. Must be one of the following types: +* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, +* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32 . \n + +*@par Inputs: +*@li offsets: The starting location for the slice. + +*@par Attributes: +*@li size: The tensor shape . \n + +*@par Outputs: +*y: A Tensor. Has the same type as "x". The slice extracted from the tensor. +*@par Restrictions: +*Warning: THIS FUNCTION IS DEPRECATED. Please use Slice instead. +*/ +REG_OP(SliceDV2) + .INPUT(x, TensorType::BasicType()) + .INPUT(offsets, TensorType::IndexNumberType()) + .OUTPUT(y, TensorType::BasicType()) + .REQUIRED_ATTR(size, ListInt) + .OP_END_FACTORY_REG(SliceDV2) + +/** * @brief Finds values and indices of the "k" largest elements for the last * dimension . \n @@ -1921,6 +1949,160 @@ REG_OP(CumulativeLogsumexpD) .ATTR(exclusive, Bool, false) .ATTR(reverse, Bool, false) .OP_END_FACTORY_REG(CumulativeLogsumexpD) + +/** +* @brief Add updates to var according to axis and indices. + +* @par Inputs: +* Three inputs, including: +* @li var: A Tensor. Must be one of the following types: +* float16, float32, int16, int32, int8, uint8. +* @li indices: A Tensor of the indices, type should be int32. +* @li updates: A Tensor of the same type as "var". \n + +* @par Attributes: +* @li axis: An required int to specify the axis to perform indices add. \n + +* @par Outputs: +* @li var: A Tensor. Same as input "var". + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator index_add_. +*/ +REG_OP(InplaceIndexAdd) + .INPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8, + DT_UINT8, DT_FLOAT32, DT_FLOAT16})) + .INPUT(indices, TensorType({DT_INT32})) + .INPUT(updates, TensorType({DT_INT16, DT_INT32, DT_INT8, + DT_UINT8, DT_FLOAT32, DT_FLOAT16})) + .OUTPUT(var, TensorType({DT_INT16, DT_INT32, DT_INT8, + DT_UINT8, DT_FLOAT32, DT_FLOAT16})) + .REQUIRED_ATTR(axis, Int) + .OP_END_FACTORY_REG(InplaceIndexAdd) + +/** +* @brief Replace the value of X with value according to mask. +* @par Inputs: +* three inputs, including: +* @li x: A Tensor of dtype is float16 or float32 or int32 or int8. +* @li mask: A Tensor of dtype float16 or float32 or int32 or int8. +* @li value: A Tensor or scalar of dtype float16 or float32 or int32 or int8. \n + +* @par Outputs: +* @li y: A tensor. Must be one of the following dtypes: +* float16, float32, int32, int8. +*/ +REG_OP(MaskedFill) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32})) + .INPUT(mask, TensorType({DT_BOOL})) + .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32})) + .OP_END_FACTORY_REG(MaskedFill) + +/** +* @brief Choose the value of X with value according to mask. + +* @par Inputs: +* two inputs, including: +* @li x: A Tensor of dtype is float16 or float32. +* @li mask: A Tensor of dtype is bool. \n + +* @par Outputs: +* @li y: A tensor with the same type as x. \n + +* @par Third-party framework compatibility +* Compatible with the Numpy operator select. +* Replaces the pytorch operator masked_select in some scenarios.\n +*/ +REG_OP(MaskedSelectV2) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(mask, TensorType({DT_BOOL})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(MaskedSelectV2) + +/** +* @brief Slice a tensor at its last dim, e.x. a[..., begin:end:stride]. \n + +* @par Inputs: +* One inputs, including: +* @li x: A Tensor. Must be one of the following types: float16, float32, int16, int32. + +* @par Attributes: +* @li start: An attribute of type Int, start index of last dim. \n +* @li end: An attribute of type Int, end index of last dim. \n +* @li stride: An attribute of type Int, stride of slice. \n + +* @par Outputs: +* @li y: A Tensor. Has the same type as "x". \n + +* @par Third-party framework compatibility +* No compatibility +*/ +REG_OP(SliceLastDim) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16, DT_INT32, DT_INT64})) + .REQUIRED_ATTR(start, Int) + .REQUIRED_ATTR(end, Int) + .ATTR(stride, Int, 1) + .OP_END_FACTORY_REG(SliceLastDim) + +/** +* @brief Extracts a strided slice of a tensor. Roughly speaking, this op \n +* extracts a slice of size (end-begin)/stride from the given input tensor. \n +* Starting at the location specified by begin the slice continues by \n +* adding stride to the index until all dimensions are not less than end. \n +* +* @par Inputs: +* Four inputs, including: +* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n +* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n +* complex128, float16, uint32, uint64, complex64, complex128. \n +* @li begin: A Tensor of type int32 or int64, for the index of the first value to select. +* +* @li end: A Tensor of type int32 or int64, for the index of the last value to select. +* +* @li axes: A Tensor of type int32 or int64, indicate axis to be select. +* +* @li strides: A Tensor of type int32 or int64, for the increment. +* +* @par Attributes: +* @li begin_mask: A Tensor of type int32. \n +* A bitmask where a bit "i" being "1" means to ignore the begin \n +* value and instead use the largest interval possible. +* @li end_mask: A Tensor of type int32. \n +* Analogous to "begin_mask". +* @li ellipsis_mask: A Tensor of type int32. \n +* A bitmask where bit "i" being "1" means the "i"th position \n +* is actually an ellipsis. +* @li new_axis_mask: A Tensor of type int32. \n +* A bitmask where bit "i" being "1" means the "i"th \n +* specification creates a new shape 1 dimension. +* @li shrink_axis_mask: A Tensor of type int32. \n +* A bitmask where bit "i" implies that the "i"th \n +* specification should shrink the dimensionality. +* +* @par Outputs: +* y: A Tensor. Has the same type as "x". +* +* @attention Constraints: +* +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator StridedSliceV2. +*/ +REG_OP(StridedSliceV2) + .INPUT(x, TensorType::BasicType()) + .INPUT(begin, TensorType::IndexNumberType()) + .INPUT(end, TensorType::IndexNumberType()) + .OPTIONAL_INPUT(axes, TensorType::IndexNumberType()) + .OPTIONAL_INPUT(strides, TensorType::IndexNumberType()) + .ATTR(begin_mask, Int, 0) + .ATTR(end_mask, Int, 0) + .ATTR(ellipsis_mask, Int, 0) + .ATTR(new_axis_mask, Int, 0) + .ATTR(shrink_axis_mask, Int, 0) + .OUTPUT(y, TensorType::BasicType()) + .OP_END_FACTORY_REG(StridedSliceV2) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/set_ops.h b/third_party/fwkacllib/inc/ops/set_ops.h index 1d02fa15..04e04f1b 100644 --- a/third_party/fwkacllib/inc/ops/set_ops.h +++ b/third_party/fwkacllib/inc/ops/set_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/sparse_ops.h b/third_party/fwkacllib/inc/ops/sparse_ops.h index d7512790..09d8ced9 100644 --- a/third_party/fwkacllib/inc/ops/sparse_ops.h +++ b/third_party/fwkacllib/inc/ops/sparse_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/spectral_ops.h b/third_party/fwkacllib/inc/ops/spectral_ops.h index 64fa7814..be3d7d00 100644 --- a/third_party/fwkacllib/inc/ops/spectral_ops.h +++ b/third_party/fwkacllib/inc/ops/spectral_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h index efe4715d..f1a93fa6 100644 --- a/third_party/fwkacllib/inc/ops/split_combination_ops.h +++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/state_ops.h b/third_party/fwkacllib/inc/ops/state_ops.h index db1f5353..3c8e32b6 100644 --- a/third_party/fwkacllib/inc/ops/state_ops.h +++ b/third_party/fwkacllib/inc/ops/state_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/stateful_random_ops.h b/third_party/fwkacllib/inc/ops/stateful_random_ops.h index 366112d6..c2f65c6a 100644 --- a/third_party/fwkacllib/inc/ops/stateful_random_ops.h +++ b/third_party/fwkacllib/inc/ops/stateful_random_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/stateless_random_ops.h b/third_party/fwkacllib/inc/ops/stateless_random_ops.h index dad3c379..ff9daaa3 100644 --- a/third_party/fwkacllib/inc/ops/stateless_random_ops.h +++ b/third_party/fwkacllib/inc/ops/stateless_random_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/string_ops.h b/third_party/fwkacllib/inc/ops/string_ops.h index 4a88bc79..ec84cc83 100644 --- a/third_party/fwkacllib/inc/ops/string_ops.h +++ b/third_party/fwkacllib/inc/ops/string_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/swap_co_ops.h b/third_party/fwkacllib/inc/ops/swap_co_ops.h index a1bf4f8b..6e8eaac3 100644 --- a/third_party/fwkacllib/inc/ops/swap_co_ops.h +++ b/third_party/fwkacllib/inc/ops/swap_co_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/target_crop_and_resize.h b/third_party/fwkacllib/inc/ops/target_crop_and_resize.h index 9c61f2c9..9bef1d7b 100644 --- a/third_party/fwkacllib/inc/ops/target_crop_and_resize.h +++ b/third_party/fwkacllib/inc/ops/target_crop_and_resize.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index 64e18fc7..d24c0512 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -141,7 +141,7 @@ support "NHWC/NCHW" to "NC1HWC0" and "NC1HWC0" to "NHWC/NCHW" *@par Attributes: *@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Zn" etc. *@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Zn" etc. -*@li group: A required int32, default value is 1. \n +*@li group: A optional int32, default value is 1. \n *@par Outputs: *dst: A Tensor dtype of all types. @@ -151,7 +151,7 @@ REG_OP(TransData) .OUTPUT(dst, TensorType::BasicType()) .REQUIRED_ATTR(src_format, String) .REQUIRED_ATTR(dst_format, String) - .ATTR(group, Int, 1) + .ATTR(groups, Int, 1) .OP_END_FACTORY_REG(TransData) /** @@ -357,7 +357,7 @@ REG_OP(DepthToSpace) *@brief Permutes data into spatial data blocks and then prunes them . \n *@par Inputs: -*@li x: A 4D Tensor with format NHWC. +*@li x: A 4D Tensor with format. Must set the format, supported format list ["NCHW, NHWC"] *@li crops: A 1D list or tuple of int32 or int64 . \n *Must be one of the following types: float16, float32 @@ -434,9 +434,10 @@ REG_OP(BatchToSpaceD) *@par Inputs: * Two inputs, including: -*@li x: An NHWC Tensor. Must be one of the following types: +*@li x: An 4D Tensor. Must be one of the following types: * float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, * int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. +* Must set the format, supported format list ["NCHW, NHWC"] *@li paddings: A 2D tensor of type int, specifying the input . \n *@par Attributes: diff --git a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h index e19cbd7c..8ef69d8b 100644 --- a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h +++ b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index aa8263f9..b9b2cbe5 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -1,18 +1,18 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * + * Copyright 2020 Huawei Technologies Co., Ltd + * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * http://www.apache.org/licenses/LICENSE-2.0 - * + * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ +*/ #ifndef __CCE_RUNTIME_BASE_H__ #define __CCE_RUNTIME_BASE_H__ diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index c35a1278..12a407d7 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -1,18 +1,18 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * + * Copyright 2020 Huawei Technologies Co., Ltd + * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * http://www.apache.org/licenses/LICENSE-2.0 - * + * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ +*/ #ifndef __CCE_RUNTIME_CONFIG_H__ #define __CCE_RUNTIME_CONFIG_H__ diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h index 3346ff75..4be49a8c 100644 --- a/third_party/fwkacllib/inc/runtime/context.h +++ b/third_party/fwkacllib/inc/runtime/context.h @@ -1,18 +1,18 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * + * Copyright 2020 Huawei Technologies Co., Ltd + * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * http://www.apache.org/licenses/LICENSE-2.0 - * + * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ +*/ #ifndef __CCE_RUNTIME_CONTEXT_H__ #define __CCE_RUNTIME_CONTEXT_H__ diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index c70a2372..d1a91a9b 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -1,18 +1,18 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * + * Copyright 2020 Huawei Technologies Co., Ltd + * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * http://www.apache.org/licenses/LICENSE-2.0 - * + * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ +*/ #ifndef __CCE_RUNTIME_DEVICE_H__ #define __CCE_RUNTIME_DEVICE_H__ diff --git a/third_party/fwkacllib/inc/runtime/dvfsprofile.h b/third_party/fwkacllib/inc/runtime/dvfsprofile.h index e27cd832..6e451695 100644 --- a/third_party/fwkacllib/inc/runtime/dvfsprofile.h +++ b/third_party/fwkacllib/inc/runtime/dvfsprofile.h @@ -1,18 +1,18 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * + * Copyright 2020 Huawei Technologies Co., Ltd + * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * http://www.apache.org/licenses/LICENSE-2.0 - * + * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ +*/ #ifndef __CCE_RUNTIME_DVFSPROFILE_H__ #define __CCE_RUNTIME_DVFSPROFILE_H__ diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h index f9d2eae2..41e611ea 100644 --- a/third_party/fwkacllib/inc/runtime/event.h +++ b/third_party/fwkacllib/inc/runtime/event.h @@ -1,18 +1,18 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * + * Copyright 2020 Huawei Technologies Co., Ltd + * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * http://www.apache.org/licenses/LICENSE-2.0 - * + * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ +*/ #ifndef __CCE_RUNTIME_EVENT_H__ #define __CCE_RUNTIME_EVENT_H__ diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index 98862ad4..5f519442 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -1,18 +1,18 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * + * Copyright 2020 Huawei Technologies Co., Ltd + * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * http://www.apache.org/licenses/LICENSE-2.0 - * + * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ +*/ #ifndef __CCE_RUNTIME_KERNEL_H__ #define __CCE_RUNTIME_KERNEL_H__ diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index f175cd45..e65d8604 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -1,18 +1,18 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * + * Copyright 2020 Huawei Technologies Co., Ltd + * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * http://www.apache.org/licenses/LICENSE-2.0 - * + * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ +*/ #ifndef __CCE_RUNTIME_MEM_H__ #define __CCE_RUNTIME_MEM_H__ diff --git a/third_party/fwkacllib/inc/runtime/rt.h b/third_party/fwkacllib/inc/runtime/rt.h index 0d39389b..83cafa3c 100644 --- a/third_party/fwkacllib/inc/runtime/rt.h +++ b/third_party/fwkacllib/inc/runtime/rt.h @@ -1,18 +1,18 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * + * Copyright 2020 Huawei Technologies Co., Ltd + * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * http://www.apache.org/licenses/LICENSE-2.0 - * + * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ +*/ #ifndef __CCE_RUNTIME_RT_H__ #define __CCE_RUNTIME_RT_H__ diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index c96349a0..b72b142d 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -1,18 +1,18 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * + * Copyright 2020 Huawei Technologies Co., Ltd + * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * http://www.apache.org/licenses/LICENSE-2.0 - * + * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ +*/ #ifndef __CCE_RUNTIME_MODEL_H__ #define __CCE_RUNTIME_MODEL_H__ diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h index 631c8083..6b9f80ae 100644 --- a/third_party/fwkacllib/inc/runtime/stream.h +++ b/third_party/fwkacllib/inc/runtime/stream.h @@ -1,18 +1,18 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * + * Copyright 2020 Huawei Technologies Co., Ltd + * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * http://www.apache.org/licenses/LICENSE-2.0 - * + * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ +*/ #ifndef __CCE_RUNTIME_STREAM_H__ #define __CCE_RUNTIME_STREAM_H__ @@ -36,6 +36,7 @@ extern "C" { #define RT_STREAM_FORBIDDEN_DEFAULT (0x10) #define RT_STREAM_HEAD (0x20) #define RT_STREAM_PRIMARY_DEFAULT (0x40) +#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80) /** * @ingroup stream_type diff --git a/third_party/fwkacllib/inc/tdt/index_transform.h b/third_party/fwkacllib/inc/tdt/index_transform.h index a62e0185..a5af2c83 100644 --- a/third_party/fwkacllib/inc/tdt/index_transform.h +++ b/third_party/fwkacllib/inc/tdt/index_transform.h @@ -1,18 +1,10 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +* @file index_transform.h +* +* Copyright (C) Huawei Technologies Co., Ltd. 2018-2019. All Rights Reserved. +* +* This program is used to get logical device id by phy device id. +*/ #ifndef INC_TDT_INDEX_TRANSFORM_H #define INC_TDT_INDEX_TRANSFORM_H diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h index efb37cfb..0827c062 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h +++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h @@ -73,6 +73,16 @@ #define PROF_MODEL_LOAD_MASK 0x8000000000000000 +#ifndef OS_TYPE +#define OS_TYPE 0 +#endif // OS_TYPE + +#if (OS_TYPE != LINUX) +#define MSVP_PROF_API __declspec(dllexport) +#else +#define MSVP_PROF_API __attribute__((visibility("default"))) +#endif + #include namespace Msprofiler { @@ -85,7 +95,7 @@ namespace Api { * @param index [IN] index of part(op) * @return op execution time (us) */ -uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); +MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index); } } diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h index 1299ae59..3fad74bc 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_callback.h +++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * Copyright 2020-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,6 +12,9 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. + * + * @file prof_callback.h + * @brief declaraion of profiling callbacks */ #ifndef MSPROFILER_PROF_CALLBACK_H_ diff --git a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h index 87fdcbeb..12b6aa1e 100644 --- a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h +++ b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h @@ -1,19 +1,13 @@ /** - * Copyright 2019-2020 Huawei Technologies Co., Ltd + * @file tune_api.h * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.\n * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n + * 描述:mstune调优接口头文件 */ - /** @defgroup mstune mstune调优接口 */ #ifndef TUNE_API_H #define TUNE_API_H