From bf316a3eeb362dd38287d53f4807f9d04134e2dc Mon Sep 17 00:00:00 2001 From: wuweikang Date: Mon, 28 Sep 2020 21:59:48 +0800 Subject: [PATCH 1/7] sync-from-trunk-to-blue-zone-0928 --- CMakeLists.txt | 1 + inc/external/ge/ge_prof.h | 61 +++++++-- inc/framework/common/ge_types.h | 2 + inc/framework/executor/ge_executor.h | 2 + inc/graph/debug/ge_attr_define.h | 4 + inc/graph/runtime_inference_context.h | 3 + src/common/graph/ge_attr_define.cc | 4 + src/common/graph/ge_tensor.cc | 2 +- src/common/graph/graph.mk | 2 +- src/common/graph/runtime_inference_context.cc | 33 +++++ src/common/graph/utils/node_utils.cc | 73 ++++++++-- src/ge/CMakeLists.txt | 3 + src/ge/client/ge_prof.cc | 67 ++++------ src/ge/common/dump/dump_server.cc | 21 +++ src/ge/common/profiling/profiling_manager.cc | 35 +++-- src/ge/common/util.cc | 6 +- src/ge/executor/ge_executor.cc | 16 +++ src/ge/ge_inference.mk | 3 +- src/ge/ge_runner.mk | 6 +- src/ge/graph/build/memory/block_mem_assigner.cc | 17 ++- src/ge/graph/build/memory/block_mem_assigner.h | 3 +- src/ge/graph/build/memory/graph_mem_assigner.cc | 50 ++++++- src/ge/graph/execute/graph_execute.cc | 10 ++ src/ge/graph/execute/graph_execute.h | 2 + .../graph/load/new_model_manager/davinci_model.cc | 58 +++++++- .../graph/load/new_model_manager/davinci_model.h | 10 +- .../graph/load/new_model_manager/model_manager.cc | 8 ++ .../graph/load/new_model_manager/model_manager.h | 2 + src/ge/graph/manager/graph_manager.cc | 7 + src/ge/graph/optimize/mem_rw_conflict_optimize.cc | 2 +- src/ge/graph/passes/subgraph_pass.cc | 3 + src/ge/graph/preprocess/insert_op/ge_aipp_op.cc | 56 ++++++-- src/ge/graph/preprocess/insert_op/ge_aipp_op.h | 2 + .../preprocess/insert_op/util_insert_aipp_op.cc | 25 ++-- src/ge/graph/preprocess/multi_batch_copy_graph.cc | 61 +++++++-- src/ge/graph/preprocess/multi_batch_copy_graph.h | 2 + .../node_executor/aicpu/aicpu_node_executor.cc | 11 +- src/ge/session/inner_session.cc | 29 ++++ src/ge/session/inner_session.h | 4 + src/ge/single_op/single_op.cc | 2 +- third_party/fwkacllib/inc/ops/aipp.h | 2 + .../fwkacllib/inc/ops/elewise_calculation_ops.h | 55 +++++++- third_party/fwkacllib/inc/ops/internal_ops.h | 3 + third_party/fwkacllib/inc/ops/nn_calculation_ops.h | 148 ++++++++++++--------- third_party/fwkacllib/inc/ops/nn_detect_ops.h | 18 +++ third_party/fwkacllib/inc/ops/nn_norm_ops.h | 17 +++ third_party/fwkacllib/inc/ops/nn_training_ops.h | 8 ++ third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h | 18 +++ third_party/fwkacllib/inc/ops/pad_ops.h | 70 ++++++++++ third_party/fwkacllib/inc/ops/random_ops.h | 2 +- third_party/fwkacllib/inc/ops/reduce_ops.h | 6 +- .../fwkacllib/inc/ops/resource_variable_ops.h | 54 ++++++++ third_party/fwkacllib/inc/ops/rnn.h | 69 ++++++---- third_party/fwkacllib/inc/ops/selection_ops.h | 6 + third_party/fwkacllib/inc/ops/transformation_ops.h | 3 + .../fwkacllib/inc/ops/warp_perspective_ops.h | 3 + third_party/fwkacllib/inc/runtime/mem.h | 9 +- third_party/fwkacllib/inc/tdt/status.h | 4 + .../fwkacllib/inc/toolchain/adx_datadump_server.h | 36 +++++ 59 files changed, 981 insertions(+), 258 deletions(-) create mode 100644 src/ge/common/dump/dump_server.cc create mode 100644 third_party/fwkacllib/inc/toolchain/adx_datadump_server.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 86f473e8..266ea024 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,6 +91,7 @@ else() find_library(register libregister.so ${ASCEND_RUNTIME_DIR}) find_library(resource libresource.so ${ASCEND_RUNTIME_DIR}) find_library(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) + find_library(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) endif() # add compile flags diff --git a/inc/external/ge/ge_prof.h b/inc/external/ge/ge_prof.h index dbd87966..658cea76 100644 --- a/inc/external/ge/ge_prof.h +++ b/inc/external/ge/ge_prof.h @@ -25,22 +25,11 @@ namespace ge { enum ProfDataTypeConfig { - kProfAcl = 0x0001, kProfTaskTime = 0x0002, kProfAiCoreMetrics = 0x0004, kProfAicpuTrace = 0x0008, - kProfModelExecute = 0x0010, - kProfRuntimeApi = 0x0020, - kProfRuntimeTrace = 0x0040, - kProfScheduleTimeline = 0x0080, - kProfScheduleTrace = 0x0100, - kProfAiVectorCoreMetrics = 0x0200, - kProfSubtaskTime = 0x0400, kProfTrainingTrace = 0x0800, - kProfHcclTrace = 0x1000, - kProfDataProcess = 0x2000, - kProfTaskTrace = 0x3842, - kProfModelLoad = 0x8000000000000000 + kProfHcclTrace = 0x1000 }; enum ProfilingAicoreMetrics { @@ -49,20 +38,64 @@ enum ProfilingAicoreMetrics { kAicoreSynchronization = 2, kAicoreMemory = 3, kAicoreInternalMemory = 4, - kAicoreStall = 5, - kAicoreMetricsAll = 255 // only for op_trace + kAicoreStall = 5 }; typedef struct ProfAicoreEvents ProfAicoreEvents; typedef struct aclgrphProfConfig aclgrphProfConfig; +/// +/// @ingroup AscendCL +/// @brief Initialize the profiling and set profiling configuration path +/// @param [in] profiler_path: configuration path of profiling +/// @param [in] length: length of configuration path +/// @return Status result of function +/// Status aclgrphProfInit(const char *profiler_path, uint32_t length); + +/// +/// @ingroup AscendCL +/// @brief Finalize profiling +/// @return Status result of function +/// Status aclgrphProfFinalize(); + +/// +/// @ingroup AscendCL +/// @brief Create data of type aclgrphProfConfig +/// @param [in] deviceid_list: device id list +/// @param [in] device_nums: device numbers +/// @param [in] aicore_metrics: type of aicore metrics +/// @param [in] aicore_events: pointer to aicore events be reserved, only support NULL now +/// @param [in] data_type_config: modules need profiling +/// @return Status result of function +/// aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t device_nums, ProfilingAicoreMetrics aicore_metrics, ProfAicoreEvents *aicore_events, uint64_t data_type_config); + +/// +/// @ingroup AscendCL +/// @brief Destroy data of type aclgrphProfConfig +/// @param [in] profiler_config: config of profiling +/// @return Status result of function +/// Status aclgrphProfDestroyConfig(aclgrphProfConfig *profiler_config); + +/// +/// @ingroup AscendCL +/// @brief Start profiling of modules which is configured by profiler config +/// @param [in] profiler_config: config of profiling +/// @return Status result of function +/// Status aclgrphProfStart(aclgrphProfConfig *profiler_config); + +/// +/// @ingroup AscendCL +/// @brief Stop profiling of modules which is configured by profiler config +/// @param [in] profiler_config: config of profiling +/// @return Status result of function +/// Status aclgrphProfStop(aclgrphProfConfig *profiler_config); } // namespace ge diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index 9a4fd1f9..6033521c 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -48,6 +48,8 @@ enum OpEngineType { ENGINE_AIVECTOR = 4 // not support }; +enum InputAippType { DATA_WITHOUT_AIPP = 0, DATA_WITH_STATIC_AIPP, DATA_WITH_DYNAMIC_AIPP, DYNAMIC_AIPP_NODE }; + const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 00846112..6e82bb96 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -163,6 +163,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); ge::Status GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info); + ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); + ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector &input_desc, std::vector &output_desc); diff --git a/inc/graph/debug/ge_attr_define.h b/inc/graph/debug/ge_attr_define.h index a32907bb..7538ba6a 100644 --- a/inc/graph/debug/ge_attr_define.h +++ b/inc/graph/debug/ge_attr_define.h @@ -141,8 +141,12 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_OUTPUTS; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_DIMS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_AIPP_INPUT_DIMS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DATA_RELATED_AIPP_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DATA_AIPP_DATA_NAME_MAP; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_GRAPH_HAS_BEEN_ADDED; + GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_GRAPH_NAME; diff --git a/inc/graph/runtime_inference_context.h b/inc/graph/runtime_inference_context.h index 6c6c82e7..f0b38546 100644 --- a/inc/graph/runtime_inference_context.h +++ b/inc/graph/runtime_inference_context.h @@ -23,6 +23,7 @@ #include #include "external/graph/ge_error_codes.h" #include "external/graph/tensor.h" +#include "ge_attr_value.h" namespace ge { class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY RuntimeInferenceContext { @@ -32,10 +33,12 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY RuntimeInferenceContext { static void DestroyContext(const std::string &context_id); graphStatus SetTensor(int64_t node_id, int output_id, Tensor &&tensor); + graphStatus GetTensor(int64_t node_id, int output_id, GeTensorPtr &tensor); graphStatus GetTensor(int64_t node_id, int output_id, Tensor &tensor); private: std::map> tensors_; + std::map> ge_tensors_; std::mutex mu_; static std::map> contexts_; diff --git a/src/common/graph/ge_attr_define.cc b/src/common/graph/ge_attr_define.cc index 4834c73b..cd504812 100644 --- a/src/common/graph/ge_attr_define.cc +++ b/src/common/graph/ge_attr_define.cc @@ -122,8 +122,12 @@ const std::string ATTR_NAME_AIPP_INPUTS = "_aipp_inputs"; const std::string ATTR_NAME_AIPP_OUTPUTS = "_aipp_outputs"; const std::string ATTR_NAME_INPUT_DIMS = "input_dims"; +const std::string ATTR_DYNAMIC_AIPP_INPUT_DIMS = "_dynamic_aipp_input_dims"; +const std::string ATTR_DATA_RELATED_AIPP_MODE = "_data_related_aipp_mode"; +const std::string ATTR_DATA_AIPP_DATA_NAME_MAP = "_data_aipp_data_name_map"; const std::string ATTR_NAME_GRAPH_HAS_BEEN_ADDED = "_graph_has_been_added"; + const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id"; const std::string ATTR_NAME_PARENT_GRAPH_NAME = "_parent_graph_name"; diff --git a/src/common/graph/ge_tensor.cc b/src/common/graph/ge_tensor.cc index 196b8569..65881435 100644 --- a/src/common/graph/ge_tensor.cc +++ b/src/common/graph/ge_tensor.cc @@ -431,7 +431,7 @@ graphStatus GeTensorDesc::GetShapeRange(std::vector> return GRAPH_FAILED; } std::pair pair({ele[0], ele[1]}); - range.push_back(pair); + range.emplace_back(pair); } return GRAPH_SUCCESS; diff --git a/src/common/graph/graph.mk b/src/common/graph/graph.mk index 9e9ffa3a..4ea84919 100644 --- a/src/common/graph/graph.mk +++ b/src/common/graph/graph.mk @@ -33,7 +33,6 @@ COMMON_LOCAL_SRC_FILES := \ ./utils/tuning_utils.cc \ ./utils/graph_utils.cc \ ./utils/ge_ir_utils.cc \ - ./utils/node_utils.cc \ ./utils/op_desc_utils.cc \ ./utils/type_utils.cc \ ./utils/tensor_utils.cc \ @@ -44,6 +43,7 @@ COMMON_LOCAL_SRC_FILES := \ option/ge_context.cc \ option/ge_local_context.cc \ ./runtime_inference_context.cc \ + ./utils/node_utils.cc \ COMMON_LOCAL_C_INCLUDES := \ proto/om.proto \ diff --git a/src/common/graph/runtime_inference_context.cc b/src/common/graph/runtime_inference_context.cc index 95068481..361d893c 100644 --- a/src/common/graph/runtime_inference_context.cc +++ b/src/common/graph/runtime_inference_context.cc @@ -15,6 +15,7 @@ */ #include "graph/runtime_inference_context.h" +#include "graph/utils/tensor_adapter.h" #include #include "framework/common/debug/ge_log.h" @@ -67,6 +68,14 @@ graphStatus RuntimeInferenceContext::SetTensor(int64_t node_id, int output_id, T GELOGD("Set tensor for node_id = %ld, output_id = %d", node_id, output_id); output_tensors[output_id] = std::move(tensor); + + auto &output_ge_tensors = ge_tensors_[node_id]; + if (static_cast(output_id) >= output_ge_tensors.size()) { + output_ge_tensors.resize(output_id + 1); + } + + GELOGD("Set ge tensor for node_id = %ld, output_id = %d", node_id, output_id); + output_ge_tensors[output_id] = TensorAdapter::AsGeTensorPtr(tensor); return GRAPH_SUCCESS; } @@ -93,4 +102,28 @@ graphStatus RuntimeInferenceContext::GetTensor(int64_t node_id, int output_id, T tensor = output_tensors[output_id]; return GRAPH_SUCCESS; } + +graphStatus RuntimeInferenceContext::GetTensor(int64_t node_id, int output_id, GeTensorPtr &tensor) { + if (output_id < 0) { + GELOGE(GRAPH_PARAM_INVALID, "Invalid output index: %d", output_id); + return GRAPH_PARAM_INVALID; + } + + std::lock_guard lk(mu_); + auto iter = ge_tensors_.find(node_id); + if (iter == ge_tensors_.end()) { + GELOGE(INTERNAL_ERROR, "Node not register. Id = %ld", node_id); + return INTERNAL_ERROR; + } + + auto &output_tensors = iter->second; + if (static_cast(output_id) >= output_tensors.size()) { + GELOGE(GRAPH_FAILED, "Node output is not registered. node_id = %ld, output index = %d", node_id, output_id); + return GRAPH_FAILED; + } + + GELOGD("Get ge tensor for node_id = %ld, output_id = %d", node_id, output_id); + tensor = output_tensors[output_id]; + return GRAPH_SUCCESS; +} } // namespace ge \ No newline at end of file diff --git a/src/common/graph/utils/node_utils.cc b/src/common/graph/utils/node_utils.cc index 72981d10..684e37ac 100644 --- a/src/common/graph/utils/node_utils.cc +++ b/src/common/graph/utils/node_utils.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "utils/node_utils.h" -#include "utils/op_desc_utils.h" +#include "graph/utils/node_utils.h" +#include "graph/utils/op_desc_utils.h" #include "graph/utils/graph_utils.h" #include "debug/ge_op_types.h" #include "debug/ge_util.h" @@ -23,8 +23,13 @@ #include "graph/anchor.h" #include "graph/debug/ge_attr_define.h" #include "graph/types.h" -#include "utils/tensor_utils.h" -#include "utils/type_utils.h" +#include "external/graph/operator.h" +#include "graph/ge_context.h" +#include "graph/runtime_inference_context.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/tensor_utils.h" +#include "graph/utils/tensor_adapter.h" +#include "graph/utils/type_utils.h" namespace ge { std::map> NodeUtils::map_send_info_{}; @@ -575,6 +580,58 @@ graphStatus NodeUtils::GetNodeUnknownShapeStatus(const Node &node, bool &is_unkn return GRAPH_SUCCESS; } +graphStatus NodeUtils::GetInputConstData(const ConstNodePtr &node_ptr, const string &dst_name, GeTensorPtr &ge_tensor) { + GE_CHECK_NOTNULL(node_ptr); + return NodeUtils::GetInputConstData(*node_ptr, dst_name, ge_tensor); +} + +graphStatus NodeUtils::GetInputConstData(const Node &node, const string &dst_name, GeTensorPtr &ge_tensor) { + // For inner compute graph + auto op_desc = node.GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + auto index = op_desc->GetInputIndexByName(dst_name); + auto in_data_anchor = node.GetInDataAnchor(index); + GE_CHECK_NOTNULL(in_data_anchor); + auto out_data_anchor = in_data_anchor->GetPeerOutAnchor(); + GE_CHECK_NOTNULL(out_data_anchor); + auto peer_node = out_data_anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(peer_node); + auto peer_op_desc = peer_node->GetOpDesc(); + GE_CHECK_NOTNULL(peer_op_desc); + auto peer_op_type = peer_op_desc->GetType(); + if (peer_op_type == CONSTANTOP || peer_op_type == CONSTANT) { + if (!AttrUtils::MutableTensor(peer_node->GetOpDesc(), ATTR_NAME_WEIGHTS, ge_tensor)) { + GELOGW("get attr name %s failed.", ATTR_NAME_WEIGHTS.c_str()); + return GRAPH_FAILED; + } + return GRAPH_SUCCESS; + } else if (peer_op_type == DATA) { + auto parent_node = NodeUtils::GetParentInput(peer_node); + while ((parent_node != nullptr) && (parent_node->GetType() == DATA)) { + parent_node = NodeUtils::GetParentInput(parent_node); + } + if ((parent_node != nullptr) && ((parent_node->GetType() == CONSTANT) || (parent_node->GetType() == CONSTANTOP))) { + if (!AttrUtils::MutableTensor(parent_node->GetOpDesc(), ATTR_NAME_WEIGHTS, ge_tensor)) { + GELOGW("get attr name %s failed.", ATTR_NAME_WEIGHTS.c_str()); + return GRAPH_FAILED; + } + return GRAPH_SUCCESS; + } + } + // Try get from runtime inference context + auto session_id = std::to_string(GetContext().SessionId()); + RuntimeInferenceContext *runtime_infer_ctx = nullptr; + if (RuntimeInferenceContext::GetContext(session_id, &runtime_infer_ctx) == GRAPH_SUCCESS) { + GELOGD("To get constant from runtime inference context. session_id = %s", session_id.c_str()); + auto ret = runtime_infer_ctx->GetTensor(peer_node->GetOpDesc()->GetId(), out_data_anchor->GetIdx(), ge_tensor); + if (ret == GRAPH_SUCCESS) { + return GRAPH_SUCCESS; + } + } + GELOGW("node[%s]'s input[%s]'s peer node is not const", node.GetName().c_str(), dst_name.c_str()); + return GRAPH_FAILED; +} + std::string NodeUtils::GetNodeType(const Node &node) { if (node.GetType() != FRAMEWORKOP) { return node.GetType(); @@ -587,14 +644,6 @@ std::string NodeUtils::GetNodeType(const Node &node) { std::string NodeUtils::GetNodeType(const NodePtr &node) { return node == nullptr ? "" : GetNodeType(*node); } -graphStatus NodeUtils::GetInputConstData(const ConstNodePtr &node_ptr, const string &dst_name, GeTensorPtr &ge_tensor) { - return GRAPH_SUCCESS; -} - -graphStatus NodeUtils::GetInputConstData(const Node &node, const string &dst_name, GeTensorPtr &ge_tensor) { - return GRAPH_SUCCESS; -} - ComputeGraphPtr NodeUtils::GetSubgraph(const Node &node, uint32_t index) { auto op_desc = node.GetOpDesc(); if (op_desc == nullptr) { diff --git a/src/ge/CMakeLists.txt b/src/ge/CMakeLists.txt index 8c20b336..db00d8a1 100755 --- a/src/ge/CMakeLists.txt +++ b/src/ge/CMakeLists.txt @@ -51,6 +51,7 @@ include_directories(${GE_SOURCE_DIR}/inc/graph) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) +include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/toolchain) include_directories(${CMAKE_BINARY_DIR}) include_directories(${CMAKE_BINARY_DIR}/proto/ge) @@ -227,6 +228,7 @@ target_link_libraries(ge_runner ${runtime} ${resouce} ${ascend_hal} + ${adump_server} rt dl) @@ -237,6 +239,7 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "common/dump/dump_properties.cc" "common/dump/dump_manager.cc" "common/dump/dump_op.cc" + "common/dump/dump_server.cc" "common/formats/format_transfers/*.cc" "common/formats/formats.cc" "common/formats/utils/formats_trans_utils.cc" diff --git a/src/ge/client/ge_prof.cc b/src/ge/client/ge_prof.cc index d4407852..ad9cc9eb 100644 --- a/src/ge/client/ge_prof.cc +++ b/src/ge/client/ge_prof.cc @@ -29,13 +29,14 @@ using std::vector; namespace { const uint32_t kMaxDeviceNum = 64; -const std::string PROFILING_INIT = "prof_init"; -const std::string PROFILING_FINALIZE = "prof_finalize"; -const std::string PROFILING_START = "prof_start"; -const std::string PROFILING_STOP = "prof_stop"; -const std::string DEVICES_NUMS = "devNums"; -const std::string DEVICE_ID_LIST = "devIdList"; -const std::string AICORE_METRICS = "aicoreMetrics"; +const uint32_t kDeviceListIndex = 3; +const std::string kProfilingInit = "prof_init"; +const std::string kProfilingFinalize = "prof_finalize"; +const std::string kProfilingStart = "prof_start"; +const std::string kProfilingStop = "prof_stop"; +const std::string kDeviceNums = "devNums"; +const std::string kDeviceIdList = "devIdList"; +const std::string kAicoreMetrics = "aicoreMetrics"; const std::map kProfAicoreMetricsToString = { {ge::kAicoreArithmaticThroughput, "AICORE_ARITHMATIC_THROUGHPUT"}, @@ -43,25 +44,7 @@ const std::map kProfAicoreMetricsToStri {ge::kAicoreSynchronization, "AICORE_SYNCHRONIZATION"}, {ge::kAicoreMemory, "AICORE_MEMORY"}, {ge::kAicoreInternalMemory, "AICORE_INTERNAL_MEMORY"}, - {ge::kAicoreStall, "AICORE_STALL"}, - {ge::kAicoreMetricsAll, "AICORE_METRICS_ALL"}}; - -const std::map kDataTypeConfigMapping = {{ge::kProfAcl, PROF_ACL_API}, - {ge::kProfTaskTime, PROF_TASK_TIME}, - {ge::kProfAiCoreMetrics, PROF_AICORE_METRICS}, - {ge::kProfAicpuTrace, PROF_AICPU_TRACE}, - {ge::kProfModelExecute, PROF_MODEL_EXECUTE}, - {ge::kProfRuntimeApi, PROF_RUNTIME_API}, - {ge::kProfRuntimeTrace, PROF_RUNTIME_TRACE}, - {ge::kProfScheduleTimeline, PROF_SCHEDULE_TIMELINE}, - {ge::kProfScheduleTrace, PROF_SCHEDULE_TRACE}, - {ge::kProfAiVectorCoreMetrics, PROF_AIVECTORCORE_METRICS}, - {ge::kProfSubtaskTime, PROF_SUBTASK_TIME}, - {ge::kProfTrainingTrace, PROF_TRAINING_TRACE}, - {ge::kProfHcclTrace, PROF_HCCL_TRACE}, - {ge::kProfDataProcess, PROF_DATA_PROCESS}, - {ge::kProfTaskTrace, PROF_TASK_TRACE}, - {ge::kProfModelLoad, PROF_MODEL_LOAD}}; + {ge::kAicoreStall, "AICORE_STALL"}}; } // namespace static bool g_graph_prof_init_ = false; @@ -107,11 +90,11 @@ Status aclgrphProfInit(const char *profiler_path, uint32_t length) { GraphLoader graph_loader; Command command; command.cmd_params.clear(); - command.cmd_type = PROFILING_INIT; - command.module_index = kProfModelLoad | kProfTrainingTrace; + command.cmd_type = kProfilingInit; + command.module_index = PROF_MODEL_LOAD; ret = graph_loader.CommandHandle(command); if (ret != SUCCESS) { - GELOGE(ret, "Handle profiling command %s failed, config = %s", PROFILING_INIT.c_str(), profiler_path); + GELOGE(ret, "Handle profiling command %s failed, config = %s", kProfilingInit.c_str(), profiler_path); return ret; } if (!g_graph_prof_init_) { @@ -143,10 +126,10 @@ Status aclgrphProfFinalize() { GraphLoader graph_loader; Command command; command.cmd_params.clear(); - command.cmd_type = PROFILING_FINALIZE; + command.cmd_type = kProfilingFinalize; Status ret = graph_loader.CommandHandle(command); if (ret != SUCCESS) { - GELOGE(ret, "Handle profiling command %s failed.", PROFILING_FINALIZE.c_str()); + GELOGE(ret, "Handle profiling command %s failed.", kProfilingFinalize.c_str()); return ret; } @@ -164,9 +147,9 @@ Status aclgrphProfFinalize() { bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector &prof_config_params) { prof_config_params.clear(); - prof_config_params.emplace_back(DEVICES_NUMS); + prof_config_params.emplace_back(kDeviceNums); prof_config_params.emplace_back(std::to_string(profiler_config->config.devNums)); - prof_config_params.emplace_back(DEVICE_ID_LIST); + prof_config_params.emplace_back(kDeviceIdList); std::string devID = ""; if (profiler_config->config.devNums == 0) { GELOGW("The device num is invalid."); @@ -180,7 +163,7 @@ bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector(profiler_config->config.aicoreMetrics)); if (iter == kProfAicoreMetricsToString.end()) { @@ -250,13 +233,7 @@ aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t dev } config->config.aicoreMetrics = static_cast(aicore_metrics); - uint64_t data_type = 0; - for (auto &iter : kDataTypeConfigMapping) { - if ((iter.first & data_type_config) == iter.first) { - data_type |= iter.second; - } - } - config->config.dataTypeConfig = data_type; + config->config.dataTypeConfig = data_type_config; GELOGI("Successfully create prof config."); return config; } @@ -309,9 +286,11 @@ Status aclgrphProfStart(aclgrphProfConfig *profiler_config) { GraphLoader graph_loader; Command command; command.cmd_params.clear(); - command.cmd_type = PROFILING_START; + command.cmd_type = kProfilingStart; command.cmd_params = prof_params; command.module_index = profiler_config->config.dataTypeConfig; + GELOGI("Profiling will start, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), + prof_params[kDeviceListIndex].c_str(), command.module_index); ret = graph_loader.CommandHandle(command); if (ret != SUCCESS) { GELOGE(ret, "Handle profiling command failed"); @@ -360,9 +339,11 @@ Status aclgrphProfStop(aclgrphProfConfig *profiler_config) { GraphLoader graph_loader; Command command; command.cmd_params.clear(); - command.cmd_type = PROFILING_STOP; + command.cmd_type = kProfilingStop; command.cmd_params = prof_params; command.module_index = profiler_config->config.dataTypeConfig; + GELOGI("Profiling will stop, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), + prof_params[kDeviceListIndex].c_str(), command.module_index); ret = graph_loader.CommandHandle(command); if (ret != SUCCESS) { GELOGE(ret, "Handle profiling command failed"); diff --git a/src/ge/common/dump/dump_server.cc b/src/ge/common/dump/dump_server.cc new file mode 100644 index 00000000..1f95dc3a --- /dev/null +++ b/src/ge/common/dump/dump_server.cc @@ -0,0 +1,21 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "adx_datadump_server.h" + +int AdxDataDumpServerUnInit() { return 0; } + +int AdxDataDumpServerInit() { return 0; } diff --git a/src/ge/common/profiling/profiling_manager.cc b/src/ge/common/profiling/profiling_manager.cc index d301f647..d02f7e8f 100644 --- a/src/ge/common/profiling/profiling_manager.cc +++ b/src/ge/common/profiling/profiling_manager.cc @@ -55,19 +55,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In bool convert_2_phy_device_id) { #ifdef DAVINCI_SUPPORT_PROFILING vector().swap(device_id_); - // profiling need phy device id - if (!convert_2_phy_device_id) { - device_id_.push_back(options.device_id); - } else { - uint32_t phy_device_id = 0; - rtError_t rt_ret = rtGetDevicePhyIdByIndex(static_cast(options.device_id), &phy_device_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); - return FAILED; - } - device_id_.push_back(phy_device_id); - } - job_id_ = options.job_id; Status ret; @@ -76,6 +63,20 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In ret = InitFromAclCfg(recv_profiling_config_); } else { ret = InitFromOptions(options); + if (ret == SUCCESS && is_load_profiling_) { + // profiling need phy device id + if (!convert_2_phy_device_id) { + device_id_.push_back(options.device_id); + } else { + uint32_t phy_device_id = 0; + rtError_t rt_ret = rtGetDevicePhyIdByIndex(static_cast(options.device_id), &phy_device_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); + return FAILED; + } + device_id_.push_back(phy_device_id); + } + } } if (ret != SUCCESS) { GELOGE(ret, "Failed to init profiling."); @@ -868,14 +869,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin } GELOGI("Current logic_device_id:%d", logic_device_id); - uint32_t phy_device_id = 0; - rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); - } - GELOGI("Current phy_device_id:%d", phy_device_id); bool execute_model_prof_on = false; - auto iter = std::find(device_id_.begin(), device_id_.end(), phy_device_id); + auto iter = std::find(device_id_.begin(), device_id_.end(), logic_device_id); if (iter != device_id_.end()) { execute_model_prof_on = true; } diff --git a/src/ge/common/util.cc b/src/ge/common/util.cc index cbd2ee71..ce5aa57e 100644 --- a/src/ge/common/util.cc +++ b/src/ge/common/util.cc @@ -58,7 +58,7 @@ const int kWarningThreshold = 536870912 * 2; // 536870912 represent 512M const int kMaxFileSizeLimit = INT_MAX; const int kMaxBuffSize = 256; const char *const kPathValidReason = "The path can only contain 'a-z' 'A-Z' '0-9' '-' '.' '_' and chinese character"; -constexpr uint32_t MAX_CONFIG_FILE_BYTE = 10 * 1024 * 1024; +constexpr uint32_t kMaxConfigFileByte = 10 * 1024 * 1024; } // namespace namespace ge { @@ -512,9 +512,9 @@ FMK_FUNC_HOST_VISIBILITY bool IsValidFile(const char *file_path) { stat.st_mode); return false; } - if (stat.st_size > MAX_CONFIG_FILE_BYTE) { + if (stat.st_size > kMaxConfigFileByte) { GELOGE(PARAM_INVALID, "config file %s size[%ld] is larger than max config file Bytes[%u]", - resolved_file_path.c_str(), stat.st_size, MAX_CONFIG_FILE_BYTE); + resolved_file_path.c_str(), stat.st_size, kMaxConfigFileByte); return false; } return true; diff --git a/src/ge/executor/ge_executor.cc b/src/ge/executor/ge_executor.cc index bf1e250b..0a247142 100644 --- a/src/ge/executor/ge_executor.cc +++ b/src/ge/executor/ge_executor.cc @@ -745,6 +745,22 @@ Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo GELOGI("GetAIPPInfo succ."); return SUCCESS; } + +Status GeExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { + GELOGI("Begin to get aipp type."); + if (!isInit_) { + GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); + return GE_EXEC_NOT_INIT; + } + Status ret = GraphExecutor::GetAippType(model_id, index, type, aipp_index); + if (ret != SUCCESS) { + GELOGW("Get aipp type is not success."); + return ret; + } + GELOGI("Get aipp type success."); + return SUCCESS; +} + Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info) { GELOGI("Begin to get dynamic batch output shape info"); if (!isInit_) { diff --git a/src/ge/ge_inference.mk b/src/ge/ge_inference.mk index f83e590a..232e79ec 100644 --- a/src/ge/ge_inference.mk +++ b/src/ge/ge_inference.mk @@ -29,6 +29,7 @@ COMMON_LOCAL_SRC_FILES := \ common/dump/dump_properties.cc \ common/dump/dump_manager.cc \ common/dump/dump_op.cc \ + common/dump/dump_server.cc \ common/helper/model_cache_helper.cc \ ge_local_engine/engine/host_cpu_engine.cc \ @@ -371,7 +372,6 @@ LOCAL_SRC_FILES += $(BUILER_SRC_FILES) LOCAL_SRC_FILES += $(ANALYZER_SRC_FILES) LOCAL_STATIC_LIBRARIES := libge_memory \ - libadump_server_stub \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ @@ -436,7 +436,6 @@ LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES) LOCAL_C_INCLUDES += $(ANALYZER_LOCAL_INCLUDES) LOCAL_STATIC_LIBRARIES := libge_memory \ - libadump_server_stub \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ diff --git a/src/ge/ge_runner.mk b/src/ge/ge_runner.mk index 7a65787c..04182070 100644 --- a/src/ge/ge_runner.mk +++ b/src/ge/ge_runner.mk @@ -1,5 +1,5 @@ LOCAL_PATH := $(call my-dir) - +include $(LOCAL_PATH)/stub/Makefile LIBGE_LOCAL_SRC_FILES := \ proto/fusion_model.proto \ proto/optimizer_priority.proto \ @@ -392,8 +392,8 @@ endif LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) -LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc -LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_prof.cc +LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc \ + ../../out/ge/lib64/stub/ge_prof.cc \ LOCAL_SHARED_LIBRARIES := diff --git a/src/ge/graph/build/memory/block_mem_assigner.cc b/src/ge/graph/build/memory/block_mem_assigner.cc index 746f73c2..773eac6a 100644 --- a/src/ge/graph/build/memory/block_mem_assigner.cc +++ b/src/ge/graph/build/memory/block_mem_assigner.cc @@ -413,7 +413,8 @@ BlockMemAssigner::BlockMemAssigner(ComputeGraphPtr compute_graph, const map &reusable_block_counts, const Me } bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, - uint32_t &peer_input_index) { + uint32_t &peer_input_index, bool &no_need_assign_memory) { if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { return false; } @@ -571,6 +572,11 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou // If GetBool fail, is_input_continuous is false. (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + + GE_IF_BOOL_EXEC(is_input_continuous && CheckIsZeroMemNodeType(peer_node->GetType()), + GELOGI("Node[%s] output[%u] no_need_assign_memory.", n->GetName().c_str(), out_index); + no_need_assign_memory = true; return false;); + if (is_input_continuous) { if (n->GetOwnerComputeGraph() != nullptr) { string graph_name = n->GetOwnerComputeGraph()->GetName(); @@ -828,6 +834,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, } } memory_blocks_.emplace_back(block); + blocks_store_.emplace_back(block); return block; } @@ -1143,8 +1150,10 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector bool out_node_set_continuous_input = false; bool no_need_assign_memory = ((size == 0) || CheckIsZeroMemNodeType(node->GetType())); if (!no_need_assign_memory) { - out_node_set_continuous_input = IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index); - no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input); + out_node_set_continuous_input = + IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index, no_need_assign_memory); + GE_IF_BOOL_EXEC(!no_need_assign_memory, + no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input);); } no_need_assign_memory = (no_need_assign_memory || IsKnownSubgraphData(node)); if (no_need_assign_memory) { diff --git a/src/ge/graph/build/memory/block_mem_assigner.h b/src/ge/graph/build/memory/block_mem_assigner.h index 7e37fe8e..6137911c 100644 --- a/src/ge/graph/build/memory/block_mem_assigner.h +++ b/src/ge/graph/build/memory/block_mem_assigner.h @@ -259,6 +259,7 @@ class BlockMemAssigner : public MemAssigner { ge::ComputeGraphPtr compute_graph_; std::vector memory_blocks_; + std::vector blocks_store_; std::vector zero_memory_list_; @@ -357,7 +358,7 @@ class BlockMemAssigner : public MemAssigner { bool IsZeroCopyBlock(const NodePtr &node, bool continuous); bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, - uint32_t &peer_input_index); + uint32_t &peer_input_index, bool &no_need_assign_memory); /// /// @ingroup GE diff --git a/src/ge/graph/build/memory/graph_mem_assigner.cc b/src/ge/graph/build/memory/graph_mem_assigner.cc index 583f65d8..1518714f 100644 --- a/src/ge/graph/build/memory/graph_mem_assigner.cc +++ b/src/ge/graph/build/memory/graph_mem_assigner.cc @@ -39,6 +39,33 @@ const size_t kVirtualInputNodeOutputSize = 1; const size_t kVirtualOutputNodeInputSize = 1; const size_t kVirtualNodeDataIndex = 0; const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_"; +int64_t GetSymbolOutputOffset(const std::map &anchor_to_symbol, + const std::map> &symbol_to_anchors, + const ge::NodePtr &node, const uint32_t i) { + ge::NodeIndexIO cur_node_index_io(node, i, ge::kOut); + auto iter1 = anchor_to_symbol.find(cur_node_index_io.ToString()); + if (iter1 == anchor_to_symbol.end()) { + return ge::kInvalidOffset; + } + auto out_symbol = iter1->second; + auto iter2 = symbol_to_anchors.find(out_symbol); + if (iter2 == symbol_to_anchors.end()) { + return ge::kInvalidOffset; + } + for (const auto &node_index_io : iter2->second) { + if (node_index_io.value_ == out_symbol) { + vector output_list = node->GetOpDesc()->GetOutputOffset(); + vector symbol_output_list = node_index_io.node_->GetOpDesc()->GetOutputOffset(); + if (node_index_io.index_ >= symbol_output_list.size()) { + return ge::kInvalidOffset; + } + GELOGD("Node %s %uth output offset is %ld, Symbol %s output offset is %ld.", node->GetName().c_str(), i, + output_list[i], iter2->first.c_str(), symbol_output_list.at(node_index_io.index_)); + return symbol_output_list.at(node_index_io.index_); + } + } + return ge::kInvalidOffset; +} } // namespace namespace ge { Status VariableMemoryAssigner::Assign() { @@ -1191,6 +1218,12 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt } Status GraphMemoryAssigner::CheckOffset() { + std::map anchor_to_symbol; + std::map> symbol_to_anchors; + if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Get ref-mapping for graph %s failed.", compute_graph_->GetName().c_str()); + return FAILED; + } for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { GE_CHECK_NOTNULL(node->GetOpDesc()); vector input_list = node->GetOpDesc()->GetInputOffset(); @@ -1200,13 +1233,26 @@ Status GraphMemoryAssigner::CheckOffset() { return FAILED; } } + + bool need_update_output = false; vector output_list = node->GetOpDesc()->GetOutputOffset(); - for (auto output : output_list) { - if (output == ge::kInvalidOffset) { + for (uint32_t i = 0; i < output_list.size(); ++i) { + if (output_list[i] == ge::kInvalidOffset) { GELOGE(FAILED, "Invalid offset in node: %s output: %ld.", node->GetName().c_str(), ge::kInvalidOffset); return FAILED; } + if (node->GetType() == IDENTITY || node->GetType() == READVARIABLEOP) { + auto symbol_offset = GetSymbolOutputOffset(anchor_to_symbol, symbol_to_anchors, node, i); + if (symbol_offset != ge::kInvalidOffset && output_list[i] != symbol_offset) { + output_list[i] = symbol_offset; + need_update_output = true; + } + } } + if (need_update_output) { + node->GetOpDesc()->SetOutputOffset(output_list); + } + vector workspace_list = node->GetOpDesc()->GetWorkspace(); for (auto workspace : workspace_list) { if (workspace == ge::kInvalidOffset) { diff --git a/src/ge/graph/execute/graph_execute.cc b/src/ge/graph/execute/graph_execute.cc index 25208aa4..e1322180 100644 --- a/src/ge/graph/execute/graph_execute.cc +++ b/src/ge/graph/execute/graph_execute.cc @@ -592,7 +592,17 @@ Status GraphExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigI GELOGW("GetAIPPInfo is not success."); return ret; } + return SUCCESS; +} +Status GraphExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { + auto model_manager = ge::ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + Status ret = model_manager->GetAippType(model_id, index, type, aipp_index); + if (ret != SUCCESS) { + GELOGW("Get aipp type is not success."); + return ret; + } return SUCCESS; } diff --git a/src/ge/graph/execute/graph_execute.h b/src/ge/graph/execute/graph_execute.h index 5cf39bae..242103f8 100644 --- a/src/ge/graph/execute/graph_execute.h +++ b/src/ge/graph/execute/graph_execute.h @@ -75,6 +75,8 @@ class GraphExecutor { static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); + static Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); + /// /// @ingroup ge /// @brief Get dynamic batch_info diff --git a/src/ge/graph/load/new_model_manager/davinci_model.cc b/src/ge/graph/load/new_model_manager/davinci_model.cc index 3c2aaffa..81eb4bc9 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.cc +++ b/src/ge/graph/load/new_model_manager/davinci_model.cc @@ -125,7 +125,7 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptrHasAttr(ATTR_DATA_RELATED_AIPP_MODE)) { + GELOGW("There is no aipp releated info with index %u.", index); + return SUCCESS; + } + std::string data_mode; + (void)AttrUtils::GetStr(data_op, ATTR_DATA_RELATED_AIPP_MODE, data_mode); + if (data_mode == "static_aipp") { + type = DATA_WITH_STATIC_AIPP; + } else if (data_mode == "dynamic_aipp") { + type = DATA_WITH_DYNAMIC_AIPP; + } else if (data_mode == "dynamic_aipp_conf") { + type = DYNAMIC_AIPP_NODE; + } else { + GELOGE(INTERNAL_ERROR, "The info of aipp releated info %s is invalid with index %u.", data_mode.c_str(), index); + return INTERNAL_ERROR; + } + + if (type == DATA_WITH_DYNAMIC_AIPP) { + string releated_name; + (void)AttrUtils::GetStr(data_op, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name); + for (size_t i = 0; i < data_op_list_.size(); ++i) { + GE_CHECK_NOTNULL(data_op_list_[i]); + if (data_op_list_[i]->GetName() == releated_name) { + GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), i, index); + aipp_index = i; + } + } + if (aipp_index == 0xFFFFFFFF) { + GELOGE(INTERNAL_ERROR, "Can not find aipp data node from index %u", index); + return INTERNAL_ERROR; + } + } + return SUCCESS; +} + void DavinciModel::SetDynamicSize(const std::vector &batch_num, int32_t dynamic_type) { batch_size_.clear(); if (batch_num.empty()) { @@ -1666,9 +1708,9 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, return; } // judge if this data is linked dynamic aipp first, multiply batch has been considered - if (op_desc->HasAttr("_dynamic_aipp_input_dims")) { + if (op_desc->HasAttr(ATTR_DYNAMIC_AIPP_INPUT_DIMS)) { vector dynamic_aipp_input_dims; - (void)AttrUtils::GetListInt(op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_input_dims); + (void)AttrUtils::GetListInt(op_desc, ATTR_DYNAMIC_AIPP_INPUT_DIMS, dynamic_aipp_input_dims); SetInputDimsInfo(dynamic_aipp_input_dims, format, input); return; } else { @@ -3371,11 +3413,15 @@ bool DavinciModel::IsBroadCastOpData(const ge::NodePtr &var_node) { /// @return Status /// Status DavinciModel::InitModelStream(rtStream_t stream) { + ExecuteMode curr_mode = is_async_mode_ ? ASYNCHRONIZATION : SYNCHRONIZATION; + GE_CHK_BOOL_RET_STATUS((curr_mode == last_execute_mode_) || (last_execute_mode_ == INITIALIZATION), INTERNAL_ERROR, + "NnExecute not support mix execute."); + last_execute_mode_ = curr_mode; + // asynchronize mode, use user input stream. if (is_async_mode_) { rt_model_stream_ = stream; is_inner_model_stream_ = false; - last_execute_mode_ = true; return SUCCESS; } @@ -3387,14 +3433,12 @@ Status DavinciModel::InitModelStream(rtStream_t stream) { rt_model_stream_ = stream; is_inner_model_stream_ = false; - last_execute_mode_ = false; return SUCCESS; } - if (last_execute_mode_ || (rt_model_stream_ == nullptr)) { + if (rt_model_stream_ == nullptr) { GE_CHK_RT_RET(rtStreamCreateWithFlags(&rt_model_stream_, priority_, RT_STREAM_FORBIDDEN_DEFAULT)); is_inner_model_stream_ = true; - last_execute_mode_ = false; } return SUCCESS; diff --git a/src/ge/graph/load/new_model_manager/davinci_model.h b/src/ge/graph/load/new_model_manager/davinci_model.h index 15f4539f..438fe639 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.h +++ b/src/ge/graph/load/new_model_manager/davinci_model.h @@ -75,6 +75,12 @@ struct timeInfo { int64_t dumpEndTime; }; +enum ExecuteMode { + INITIALIZATION, + SYNCHRONIZATION, + ASYNCHRONIZATION, +}; + // comments class DavinciModel { public: @@ -314,6 +320,8 @@ class DavinciModel { /// Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info); + Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index); + /// /// @ingroup ge /// @brief Get model_id. @@ -884,7 +892,7 @@ class DavinciModel { bool is_inner_model_stream_; bool is_async_mode_; // For NN execute, Async mode use rtMemcpyAsync on rt_model_stream_. - bool last_execute_mode_; + ExecuteMode last_execute_mode_; bool is_stream_list_bind_{false}; bool is_pure_head_stream_{false}; diff --git a/src/ge/graph/load/new_model_manager/model_manager.cc b/src/ge/graph/load/new_model_manager/model_manager.cc index 320bfb16..f6995052 100644 --- a/src/ge/graph/load/new_model_manager/model_manager.cc +++ b/src/ge/graph/load/new_model_manager/model_manager.cc @@ -876,6 +876,14 @@ Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippCo return davinci_model->GetAIPPInfo(index, aipp_info); } +Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { + std::shared_ptr davinci_model = GetModel(model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetAIPPInfo failed, invalid model_id is %u.", + model_id); + + return davinci_model->GetAippType(index, type, aipp_index); +} + Status ModelManager::GenSessionId(uint64_t &session_id) { std::lock_guard lock(session_id_create_mutex_); diff --git a/src/ge/graph/load/new_model_manager/model_manager.h b/src/ge/graph/load/new_model_manager/model_manager.h index e89bfc36..3dce3807 100644 --- a/src/ge/graph/load/new_model_manager/model_manager.h +++ b/src/ge/graph/load/new_model_manager/model_manager.h @@ -224,6 +224,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { /// ge::Status GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); + ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); + /// /// @ingroup domi_ome /// @brief set model input and output size zero copy diff --git a/src/ge/graph/manager/graph_manager.cc b/src/ge/graph/manager/graph_manager.cc index 82108653..39bdee36 100644 --- a/src/ge/graph/manager/graph_manager.cc +++ b/src/ge/graph/manager/graph_manager.cc @@ -2795,11 +2795,18 @@ Status GraphManager::SaveVariables(const Graph &graph, const std::vectorfirst; + auto var_tensor = iter->second.GetTensorDesc(); + var_tensor.SetName(var_name); + iter->second.SetTensorDesc(var_tensor); var_values.emplace_back(iter->second); } } diff --git a/src/ge/graph/optimize/mem_rw_conflict_optimize.cc b/src/ge/graph/optimize/mem_rw_conflict_optimize.cc index 9c166f4d..3c3419ae 100644 --- a/src/ge/graph/optimize/mem_rw_conflict_optimize.cc +++ b/src/ge/graph/optimize/mem_rw_conflict_optimize.cc @@ -491,7 +491,7 @@ Status SplitIdentityAlongAnchor(const OutDataAnchorPtr &out_data_anchor, const I if (input_rw_type == InputRWType::kScopeWriteable || input_rw_type == InputRWType::kWriteable) { auto new_identity = CreateIdentityAfterSrcNode(*pre_node, pre_out_data_anchor->GetIdx()); GE_CHECK_NOTNULL(new_identity); - if (GraphUtils::AddEdge(pre_out_data_anchor, new_identity->GetInDataAnchor(kIdentityAnchorIndex)) != SUCCESS && + if (GraphUtils::AddEdge(pre_out_data_anchor, new_identity->GetInDataAnchor(kIdentityAnchorIndex)) != SUCCESS || GraphUtils::AddEdge(new_identity->GetOutDataAnchor(kIdentityAnchorIndex), peer_in_data_anchor) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to insert Identity between node %s and %s", pre_out_data_anchor->GetOwnerNode()->GetName().c_str(), diff --git a/src/ge/graph/passes/subgraph_pass.cc b/src/ge/graph/passes/subgraph_pass.cc index fbf444fb..fd71e65b 100644 --- a/src/ge/graph/passes/subgraph_pass.cc +++ b/src/ge/graph/passes/subgraph_pass.cc @@ -176,6 +176,9 @@ Status SubgraphPass::WhileInputNodes(const ComputeGraphPtr &graph, const NodePtr GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); NodePtr in_node = peer_out_anchor->GetOwnerNode(); GE_CHECK_NOTNULL(in_node); + if (in_node->GetType() == VARIABLE || in_node->GetType() == VARHANDLEOP || in_node->GetType() == VARIABLEV2) { + continue; + } // Input->While and Input link to other nodes need insert memcpy if (peer_out_anchor->GetPeerInDataAnchors().size() > 1) { GELOGD("Input %s of While %s links to other nodes.", in_node->GetName().c_str(), node->GetName().c_str()); diff --git a/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc index eb936282..545fe66f 100644 --- a/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -124,7 +124,14 @@ Status GetDataDimN(const ge::NodePtr &data_node, ge::Format format, int64_t &bat return PARAM_INVALID; } } - GELOGE(PARAM_INVALID, "when dynamic aipp, shape must be in range [3, 4], but is %zu", shape.size()); + string errormsg = + "its shape size must be in range[3,4] which dynamic aipp is linked, " + "maybe this input is not suitable for dynamic aipp"; + ErrorManager::GetInstance().ATCReportErrMessage( + "E10001", {"parameter", "value", "reason"}, + {data_node->GetName() + " shape size", to_string(shape.size()), errormsg}); + GELOGE(PARAM_INVALID, "The shape size of this node [%s] which linked dynamic aipp must be in range[3, 4], but is %zu", + data_node->GetName().c_str(), shape.size()); return PARAM_INVALID; } @@ -272,7 +279,6 @@ Status AippOp::AddAippAttrbutes(const OpDescPtr &op_desc, const std::string &aip GE_CHK_BOOL_RET_STATUS(AttrUtils::SetInt(op_desc, kCurrentAippIndex, index), INTERNAL_ERROR, "Set kCurrentAippIndex attr for aipp node failed"); - // add input/output desc GeTensorDesc tensor; GE_CHK_GRAPH_STATUS_RET(op_desc->AddInputDesc("images", tensor), "Failed to add input images for aipp node"); @@ -318,6 +324,7 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr GELOGE(PARAM_INVALID, "Get target input node for rank %d failed", rank); return PARAM_INVALID; } + data_node_linked_aipp = data_node; auto data_opdesc = data_node->GetOpDesc(); GE_CHECK_NOTNULL(data_opdesc); string set_dt_str; @@ -330,10 +337,17 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr return PARAM_INVALID; } + // add dynamic or static attr memsage to data + if (GetAippMode() == domi::AippOpParams::static_) { + (void)AttrUtils::SetStr(data_opdesc, ATTR_DATA_RELATED_AIPP_MODE, "static_aipp"); + } else if (GetAippMode() == domi::AippOpParams::dynamic) { + (void)AttrUtils::SetStr(data_opdesc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp"); + } + // In scenario AIPP+CONV2D+POOLING, keep the aipp info to Data, since AIPP disappear after subgraph optimize GeAttrValue::NAMED_ATTRS aipp_attr; ConvertParamToAttr(aipp_attr); - if (!AttrUtils::SetNamedAttrs(data_node->GetOpDesc(), ATTR_NAME_AIPP, aipp_attr)) { + if (!AttrUtils::SetNamedAttrs(data_opdesc, ATTR_NAME_AIPP, aipp_attr)) { GELOGE(INTERNAL_ERROR, "Set name attrs for Data node failed. id: %d", rank); return INTERNAL_ERROR; } @@ -737,7 +751,7 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { data_shape_n = data_op_desc->MutableInputDesc(0)->GetShape().GetDim(0); } vector dynamic_aipp_linked_data_shape{data_shape_n, kDynamicDim, kDynamicDim, kDynamicDim}; - (void)AttrUtils::SetListInt(data_op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_linked_data_shape); + (void)AttrUtils::SetListInt(data_op_desc, ATTR_DYNAMIC_AIPP_INPUT_DIMS, dynamic_aipp_linked_data_shape); int64_t batch_count = -1; if (GetDataDimN(data_node, ori_data_format, batch_count) != ge::SUCCESS) { @@ -759,7 +773,24 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { return AddNodeToGraph(aipp_node, max_dynamic_aipp_size); } +Status AippOp::AddAttrToAippData(const OpDescPtr &aipp_data_op_desc) { + // Add dynamic aipp config to aipp_data + GeAttrValue::NAMED_ATTRS aipp_attr; + ConvertParamToAttr(aipp_attr); + (void)AttrUtils::SetNamedAttrs(aipp_data_op_desc, ATTR_NAME_AIPP, aipp_attr); + (void)AttrUtils::SetStr(aipp_data_op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp_conf"); + + // add node name attr to data linked aipp_data, it can be queried by acl. + GE_CHECK_NOTNULL(data_node_linked_aipp); + auto data_op_desc = data_node_linked_aipp->GetOpDesc(); + GE_CHECK_NOTNULL(data_op_desc); + (void)AttrUtils::SetStr(data_op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, aipp_data_op_desc->GetName()); + (void)AttrUtils::SetStr(aipp_data_op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, data_op_desc->GetName()); + return SUCCESS; +} + Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size) { + static int index = 0; std::vector input_shape_dim(1, max_dynamic_aipp_size); GeShape input_shape(input_shape_dim); // construct input tensor @@ -767,18 +798,21 @@ Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp TensorUtils::SetReuseInput(input_tensor, false); TensorUtils::SetSize(input_tensor, max_dynamic_aipp_size); - // Only flush subgraph name const ComputeGraphPtr &graph = aipp_node->GetOwnerComputeGraph(); - string node_name = (graph->GetParentGraph() == nullptr) ? kDynamicAippData : (graph->GetName() + "_" + node_name); - + string node_name; + if (index == 0) { + node_name = kDynamicAippData; + } else { + node_name = string(kDynamicAippData) + "_" + to_string(index); + } + ++index; // new add aipp_data ops for dynamic aipp param input OpDescPtr op_desc_ptr_data = MakeShared(node_name, AIPPDATA); GE_CHECK_NOTNULL(op_desc_ptr_data); - // Add dynamic aipp config to aipp_data - GeAttrValue::NAMED_ATTRS aipp_attr; - ConvertParamToAttr(aipp_attr); - (void)AttrUtils::SetNamedAttrs(op_desc_ptr_data, ATTR_NAME_AIPP, aipp_attr); + if (AddAttrToAippData(op_desc_ptr_data) != SUCCESS) { + return INTERNAL_ERROR; + } auto stat1 = op_desc_ptr_data->AddInputDesc(input_tensor); diff --git a/src/ge/graph/preprocess/insert_op/ge_aipp_op.h b/src/ge/graph/preprocess/insert_op/ge_aipp_op.h index c98935ee..64c89b62 100644 --- a/src/ge/graph/preprocess/insert_op/ge_aipp_op.h +++ b/src/ge/graph/preprocess/insert_op/ge_aipp_op.h @@ -78,9 +78,11 @@ class AippOp : public InsertOpBase { Status CreateAippData(const NodePtr &aipp); Status AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size); Status AddAippAttrbutes(const OpDescPtr &op_desc, const std::string &aipp_cfg_path, const uint32_t &index); + Status AddAttrToAippData(const OpDescPtr &aipp_data_op_desc); domi::AippOpParams *aipp_params_ = nullptr; ge::NodePtr aipp_node_ = nullptr; + ge::NodePtr data_node_linked_aipp = nullptr; }; } // namespace ge diff --git a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index c55be013..83a16e75 100644 --- a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -22,6 +22,7 @@ #include "common/ge/ge_util.h" #include "common/op/ge_op_utils.h" #include "common/util.h" +#include "common/util/error_manager/error_manager.h" #include "framework/common/debug/ge_log.h" #include "framework/common/debug/log.h" #include "framework/common/ge_inner_error_codes.h" @@ -120,15 +121,15 @@ Status InsertNewOpUtil::CheckPositionNotRepeat() { for (int j = i + 1; j < insert_op_conf_->aipp_op_size(); j++) { const domi::AippOpParams *another_item = insert_op_conf_->mutable_aipp_op(j); - - GE_IF_BOOL_EXEC(item->related_input_rank() != another_item->related_input_rank(), continue;); - - GE_IF_BOOL_EXEC( - item->input_edge_idx_size() == 0 || another_item->input_edge_idx_size() == 0 || - item->input_edge_idx(0) == another_item->input_edge_idx(0), - GELOGE(PARAM_INVALID, - "Can not insert aipp op to the same postion! please check related_input_rank and input_edge_idx."); - return PARAM_INVALID;); + GE_IF_BOOL_EXEC(item->related_input_rank() == another_item->related_input_rank(), + string errormsg = + "Can not insert aipp to the same postion! Please ensure related_input_rank" + " param is different in different aipp config."; + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); + GELOGE(PARAM_INVALID, + "Can not insert aipp op to the same postion! Please ensure related_input_rank param " + "is different in different aipp config."); + return PARAM_INVALID;); } } @@ -162,18 +163,12 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { std::unique_ptr aippParams(new (std::nothrow) domi::AippOpParams()); GE_CHECK_NOTNULL(aippParams); - GE_IF_BOOL_EXEC(aippNodes.size() > 0, GE_CHK_STATUS(GetAippParams(aippParams, aippNodes[0])); - aippMode = (aippMode == domi::AippOpParams::undefined) ? aippParams->aipp_mode() : aippMode; - GE_CHK_BOOL_RET_STATUS(aippMode == aippParams->aipp_mode(), PARAM_INVALID, - "The aipp_mode of all aipp_op must be the same");); GE_IF_BOOL_EXEC( aippNodes.size() > 1, for (decltype(aippNodes)::size_type i = 1; i < aippNodes.size(); i++) { std::unique_ptr currAippParam(new (std::nothrow) domi::AippOpParams()); GE_CHECK_NOTNULL(currAippParam); GE_CHK_STATUS(GetAippParams(currAippParam, aippNodes[i])); - GE_CHK_BOOL_RET_STATUS(aippMode == currAippParam->aipp_mode(), PARAM_INVALID, - "The aipp_mode of all aipp_op must be the same"); if (aippMode == domi::AippOpParams::static_) { GE_CHK_BOOL_RET_STATUS(aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID, "The input_format of all aipp_ops after one Data should be the same"); diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.cc b/src/ge/graph/preprocess/multi_batch_copy_graph.cc index 298e7749..331d9c31 100644 --- a/src/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/src/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -41,6 +41,7 @@ #include "inc/pass_manager.h" #include "graph/common/local_context.h" +using std::map; using std::set; using std::string; using std::vector; @@ -265,27 +266,24 @@ Status MultiBatchGraphCopyer::Init() { } Status MultiBatchGraphCopyer::LabelStatus() { - for (const auto &data : origin_data_nodes_) { - auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); - if (!IsAllDimsPositive(data_shape.GetDims())) { - origin_nodes_status_[data.get()] = kNodeInBatchBranch; - } - } + map> frame_enters; + InitStatus(frame_enters); + bool changed = true; // If anyone of in node is kNodeInBatchBranch, it is also kNodeInBatchBranch while (changed) { changed = false; for (const auto &node : origin_all_nodes_) { - auto iter = origin_nodes_status_.find(node.get()); - if (iter != origin_nodes_status_.end()) { - continue; - } for (auto &in_node : node->GetInAllNodes()) { bool is_in_batch = origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end() && origin_nodes_status_[in_node.get()] == kNodeInBatchBranch; if (is_in_batch) { - origin_nodes_status_[node.get()] = kNodeInBatchBranch; - changed = true; + if (origin_nodes_status_.find(node.get()) == origin_nodes_status_.end() || + origin_nodes_status_[node.get()] != kNodeInBatchBranch) { + origin_nodes_status_[node.get()] = kNodeInBatchBranch; + ResetEnterStatus(frame_enters, node); + changed = true; + } break; } } @@ -316,6 +314,45 @@ Status MultiBatchGraphCopyer::LabelStatus() { return SUCCESS; } +void MultiBatchGraphCopyer::InitStatus(map> &frame_enters) { + for (const auto &node : origin_all_nodes_) { + if (node->GetType() != ENTER && node->GetType() != REFENTER) { + continue; + } + auto op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + continue; + } + string frame_name; + if (AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) { + frame_enters[frame_name].emplace_back(node); + } + } + + for (const auto &data : origin_data_nodes_) { + auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); + if (!IsAllDimsPositive(data_shape.GetDims())) { + origin_nodes_status_[data.get()] = kNodeInBatchBranch; + } + } +} + +void MultiBatchGraphCopyer::ResetEnterStatus(map> &frame_enters, const NodePtr &node) { + if (node->GetType() != ENTER && node->GetType() != REFENTER) { + return; + } + + for (const auto &frame_enter : frame_enters) { + auto &enters = frame_enter.second; + if (std::find(enters.begin(), enters.end(), node) != enters.end()) { + for (const auto &enter : enters) { + origin_nodes_status_[enter.get()] = kNodeInBatchBranch; + } + break; + } + } +} + Status MultiBatchGraphCopyer::CreateNewNodes() { shape_data_ = InsertShapeDataNode(); if (shape_data_ == nullptr) { diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.h b/src/ge/graph/preprocess/multi_batch_copy_graph.h index 062b98d2..f665b65e 100644 --- a/src/ge/graph/preprocess/multi_batch_copy_graph.h +++ b/src/ge/graph/preprocess/multi_batch_copy_graph.h @@ -68,6 +68,8 @@ class MultiBatchGraphCopyer { // label status for origin_all_nodes_ Status LabelStatus(); + void InitStatus(std::map> &frame_enters); + void ResetEnterStatus(std::map> &frame_enters, const NodePtr &node); // add nodes functions Status CreateNewNodes(); diff --git a/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 44fe377a..871f1db4 100644 --- a/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -722,8 +722,15 @@ Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node GE_CHECK_NOTNULL(node_item); auto task_defs = model.GetTaskDefs(node); GE_CHECK_NOTNULL(task_defs); - GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID, "Node[%s] task_def num[%zu] != 1", - node->GetName().c_str(), (*task_defs).size()); + if (node_item->shape_inference_type != DEPEND_COMPUTE) { + GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID, "Node[%s] task_def num[%zu] != 1", + node->GetName().c_str(), (*task_defs).size()); + } else { + // The number of tasks of the fourth type operator may be 2 + GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1 || (*task_defs).size() == 2, PARAM_INVALID, + "Node[%s] DEPEND_COMPUTE task_def num[%zu] != 1 or 2", node->GetName().c_str(), + (*task_defs).size()); + } const auto &task_def = (*task_defs)[0]; std::shared_ptr aicpu_task; if (task_def.type() == RT_MODEL_TASK_KERNEL_EX) { diff --git a/src/ge/session/inner_session.cc b/src/ge/session/inner_session.cc index 3d3adfd8..44c29460 100644 --- a/src/ge/session/inner_session.cc +++ b/src/ge/session/inner_session.cc @@ -18,6 +18,7 @@ #include #include #include +#include "adx_datadump_server.h" #include "common/dump/dump_properties.h" #include "common/util.h" #include "framework/common/debug/ge_log.h" @@ -76,10 +77,12 @@ Status InnerSession::Initialize() { DumpProperties dump_properties; dump_properties.InitByOptions(); + GE_CHK_STATUS_RET(AddDumpProperties(dump_properties), "Add dump properties failed"); ret = graph_manager_.Initialize(options_); if (ret != SUCCESS) { GELOGE(ret, "[InnerSession:%lu] initialize failed.", session_id_); + GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed"); return ret; } @@ -87,6 +90,7 @@ Status InnerSession::Initialize() { if (ret != SUCCESS) { GELOGE(ret, "failed to set malloc size"); (void)graph_manager_.Finalize(); + GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed"); GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); return ret; } @@ -97,6 +101,7 @@ Status InnerSession::Initialize() { ret = VarManager::Instance(session_id_)->Init(version, session_id_, DEFAULT_DEVICE_ID, DEFAULT_JOB_ID); if (ret != SUCCESS) { GELOGE(ret, "failed to init session instance"); + GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed"); } init_flag_ = true; return SUCCESS; @@ -122,6 +127,7 @@ Status InnerSession::Finalize() { (void)VarManager::Instance(session_id_)->FreeVarMemory(); GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); + GE_CHK_STATUS_RET(RemoveDumpProperties(), "Remove dump properties failed"); return ret; } @@ -297,4 +303,27 @@ Status InnerSession::SaveVariables(const Graph &graph, const std::vectorfloat16) scenarios. +* Mixed precision is enabled by default. +* \n +* *@par Attributes: -* @li strides: A list of 4 integers. Specifying the strides of the +*@li strides: Required. A list of 4 integers. Specifying the strides of the * convolution along the height and width. The dimension order is determined * by the data format of "x". By default the N and C dimensions are set to 1. -* @li pads: A list of 4 integers. Specifying the top, bottom, left and right -* padding. -* @li dilations: A list of 4 integers. Specifying the dilation rate to use -* for dilated convolution. Has the same dimension order and value as "strides". -* @li groups: Number of blocked connections from input channels to output -* channels. Input channels and output channels must both be divisible by -* "groups".Type is int32. -* @li offset_x: An optional integer for quantized convolution. Type is int32. Defaults to "0". -* @li data_format: An optional string from: "NHWC", "NCHW". Specifying the -* data format of the input and output images. Type is string. Defaults to "NHWC". Reserved . \n - -*@par Outputs: -* @li y: A 4D Tensor of output images . \n - -*@attention -* @li The parameter scope is listed as follows: -* @verbatim - |Name | Field | Scope - ------------------|--------------|---------- - |Input Image Size | H dimension | [1, 4096] - | | W dimension | [1, 4096] - ------------------|--------------|---------- - |Filter Size | H dimension | [1, 255] - | | W dimension | [1, 255] - ------------------|--------------|---------- - |Stride Size | H dimension | [1, 63] - | | W dimension | [1, 63] - ------------------|--------------|---------- - |Padding Size | top side | [0, 255] - | | bottom side | [0, 255] - | | left side | [0, 255] - | | right side | [0, 255] - ------------------|--------------|---------- - |Dilation Size | H dimension | [1, 255] - | W dimension | [1, 255] +*@li pads: Required. A list of 4 integers. Specifying the top, bottom, left +* and right padding. +* @li dilations: Optional. A list of 4 integers. Specifying the dilation rate +* to use for dilated convolution. Has the same dimension order and value as +* "strides". Defaults to [1, 1, 1, 1]. +* @li groups: Optional. An integer of type int32, for the number of blocked +* connections from input channels to output channels. Input channels and output +* channels must both be divisible by "groups". "x" in_channels must be equal to +* "filter" in_channels * groups. Defaults to 1. +* @li offset_x: Optional. An integer of type int32, for quantized convolution. +* Defaults to 0. +* @li data_format: Reserved and optional. A string from: "NHWC" and "NCHW". +* Specifying the data format of the input and output images. Defaults to +* "NHWC". +*\n +*\n +* The following value range restrictions must be met: +*@verbatim + |Name | Field | Scope + ------------------|----------|---------- + |Input Image Size | H | [1, 4096] + | | W | [1, 4096] + ------------------|----------|---------- + |Filter Size | H | [1, 255] + | | W | [1, 255] + ------------------|----------|---------- + |Stride | H | [1, 63] + | | W | [1, 63] + ------------------|----------|---------- + |Padding | top | [0, 255] + | | bottom | [0, 255] + | | left | [0, 255] + | | right | [0, 255] + ------------------|----------|---------- + |Dilation | H | [1, 255] + | | W | [1, 255] @endverbatim - -* @li There are restrictions for certain scenarios: -* @verbatim - Output | Restrictions - ------------------|---------------------------------------------- - W dimension == 1 | HxW(input) == HxW(filter) - H dimension == 1 | - ------------------|---------------------------------------------- - W dimension == 1 | Not supported - H dimension != 1 | +* +*@par Outputs: +*@li y: A 4D Tensor of output images. Has the same type and format as "x". With +* "NHWC" format, the shape is [batch, out_height, out_width, out_channels]. +*\n +* out_height = (in_height + top_pad + bottom_pad - +* dilation_h * (filter_height - 1) - 1) +* / stride_h + 1 +*\n +* out_width = (in_width + left_pad + right_pad - +* dilation_w * (filter_width - 1) - 1) +* / stride_w + 1 +* +*@attention Constraints: +*@li The following restrictions on the output must be met: +*@verbatim + | Output | Restrictions + -------------------|--------------------------- + | W dimension == 1 | H*W(input) == H*W(filter) + | H dimension == 1 | + -------------------|--------------------------- + | W dimension == 1 | Not supported + | H dimension != 1 | @endverbatim -* As shown above, "HxW(input)" indicates the image size after padding and -* "HxW(filter)" indicates the filter size after dilation . \n - +* "H * W (input)" indicates the image size after padding and "H * W (filter)" +* indicates the filter size after dilation. +*\n +* *@par Quantization supported or not -* Yes - +*@li Yes +* *@par Third-party framework compatibility *@li Compatible with the TensorFlow operator "conv2d". *@li Compatible with the Caffe operator 2D "Convolution". diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index d9c28087..415cc4ef 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -1035,6 +1035,9 @@ REG_OP(ROIPooling) *@par Outputs: * @ decoded_boxes: A Tensor. Must have the same type as box_predictions. * N-D with shape [N, 4]. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DecodeBbox) .INPUT(box_predictions, TensorType{DT_FLOAT16}) @@ -1052,6 +1055,9 @@ REG_OP(DecodeBbox) *@par Outputs: *boxes_output: A Tensor. Must have the same type as boxes_output. N-D with shape [N, 4]. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ClipBoxes) .INPUT(boxes_input, TensorType({DT_FLOAT16})) @@ -1270,6 +1276,9 @@ REG_OP(RpnProposalPostProcessing) * *@par Outputs: * @ boundary_encoded: A Tensor. Must be float16. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DecodeBoundariesTarget) .INPUT(boundary_predictions, TensorType({DT_FLOAT16})) @@ -1287,6 +1296,9 @@ REG_OP(DecodeBoundariesTarget) * *@par Outputs: * @ keypoints_decoded: A Tensor. Must be float16. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DecodeCornerpointsTargetBG) .INPUT(keypoints_prediction, TensorType({DT_FLOAT16})) @@ -1304,6 +1316,9 @@ REG_OP(DecodeCornerpointsTargetBG) * *@par Outputs: * @ keypoints_decoded: A Tensor. Must be float16. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DecodeCornerpointsTargetWrtCenterV1) .INPUT(keypoints_prediction, TensorType({DT_FLOAT16})) @@ -1321,6 +1336,9 @@ REG_OP(DecodeCornerpointsTargetWrtCenterV1) * *@par Outputs: * @ boundary_encoded: A Tensor. Must be float16. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DecodeWheelsTarget) .INPUT(boundary_predictions, TensorType({DT_FLOAT16})) diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index 6d4f6f9d..14949c54 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -335,6 +335,8 @@ REG_OP(LogSoftmaxV2) *@par Outputs: * y: A Tensor of the same type as "grad" . \n +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ConfusionSoftmaxGrad) .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -499,6 +501,9 @@ REG_OP(LayerNorm) * @li pd_x: A Tensor. Must be one of the following types: float16, float32. * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. * @li pd_beta: A Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LayerNormGrad) .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -540,6 +545,9 @@ REG_OP(LayerNormGrad) *@par Outputs: *Three outputs, including: * @li pd_x: A Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LayerNormXBackprop) .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -579,6 +587,9 @@ REG_OP(LayerNormXBackprop) *Three outputs, including: * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. * @li pd_beta: A Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LayerNormBetaGammaBackprop) .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -811,6 +822,9 @@ instruction . \n *@par Third-party framework compatibility *@li Compatible with the PyTorch operator GroupNorm. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(GroupNorm) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -862,6 +876,9 @@ Specifies the variance of "x" . \n *@par Third-party framework compatibility *@li Compatible with the PyTorch operator InstanceNorm. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(InstanceNormV2) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h index 4f51a82e..65fb462e 100644 --- a/third_party/fwkacllib/inc/ops/nn_training_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h @@ -2031,6 +2031,9 @@ REG_OP(ApplyAdadeltaD) * Two outputs, including: * @li var: A mutable Tensor has the same type as "var". * @li accum: A mutable Tensor has the same type as "var". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(FusedMulApplyMomentum) .INPUT(var, TensorType::NumberType()) @@ -2079,6 +2082,9 @@ REG_OP(FusedMulApplyMomentum) * @li var: A Tensor has the type float32. * @li var_copy: A Tensor has the type float16. * @li accum: A Tensor has the same type as input "accum". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(FusedMulApplyMomentumExtern) .INPUT(var, TensorType(DT_FLOAT)) @@ -2581,6 +2587,8 @@ REG_OP(SparseApplyAdadeltaD) *@par Attributes: * @li automic_add_mem_size: sizes of workspaces . \n +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(AtomicAddrClean) .ATTR(automic_add_mem_size, ListInt, {}) diff --git a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h index 90628af6..e94dafa7 100644 --- a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h +++ b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h @@ -30,6 +30,9 @@ namespace ge { *@par Outputs: *data: A Tensor of data value. Must be float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NPUAllocFloatStatusOperator) .OUTPUT(data, TensorType({DT_FLOAT})) @@ -43,6 +46,9 @@ REG_OP(NPUAllocFloatStatusOperator) *@par Outputs: *data: A Tensor of data value. Must be float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NPUClearFloatStatusOperator) .INPUT(addr, TensorType{DT_FLOAT}) @@ -57,6 +63,9 @@ REG_OP(NPUClearFloatStatusOperator) *@par Outputs: *data: A Tensor of data value. Must be float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NPUGetFloatStatusOperator) .INPUT(addr, TensorType{DT_FLOAT}) @@ -68,6 +77,9 @@ REG_OP(NPUGetFloatStatusOperator) *@par Outputs: *y: A Tensor of type int32, output eight numbers with a value of zero. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NPUAllocFloatStatus) .OUTPUT(data, TensorType({DT_FLOAT})) @@ -81,6 +93,9 @@ REG_OP(NPUAllocFloatStatus) *@par Outputs: *data: A Tensor of type float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NPUClearFloatStatus) .INPUT(addr, TensorType{DT_FLOAT}) @@ -95,6 +110,9 @@ REG_OP(NPUClearFloatStatus) *@par Outputs: *data: A Tensor of type float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NPUGetFloatStatus) .INPUT(addr, TensorType{DT_FLOAT}) diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h index 5938941a..4f42008e 100644 --- a/third_party/fwkacllib/inc/ops/pad_ops.h +++ b/third_party/fwkacllib/inc/ops/pad_ops.h @@ -186,6 +186,73 @@ REG_OP(PadD) .OP_END_FACTORY_REG(PadD) /** +*@brief Pads a tensor. + +*@par Inputs: +*Two inputs, including: +* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, +* uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, +* complex128, uint32, uint64. +* @li paddings: A Tensor of type int32 or int64. +* @li constant_values: A optional Tensor of int32 or int64 + +*@par Attributes: +* @li mode: An optional string, Defaults to "constant", indicates paddings mode, +* support "constant", "reflect", "edge" +* @li paddings_contiguous: An optional bool value, Defaults to true. +* If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...] +* If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...] + +*@par Outputs: +*y: A Tensor of the same type as "x". + +*@par Third-party framework compatibility: +* Compatible with ONNX operator Pad. +*/ +REG_OP(PadV3) + .INPUT(x, TensorType::BasicType()) + .INPUT(paddings, TensorType::IndexNumberType()) + .OPTIONAL_INPUT(constant_values, TensorType::BasicType()) + .OUTPUT(y, TensorType::BasicType()) + .ATTR(mode, String, "constant") + .ATTR(paddings_contiguous, Bool, true) + .OP_END_FACTORY_REG(PadV3) + +/** +*@brief Pads a tensor. + +*@par Inputs: +*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32. + +*@par Attributes: +* @li paddings: An required "vector>". +* For each dimension D of input, paddings[D, 0] indicates how many +* values to add before the contents of tensor in that dimension, +* and paddings[D, 1] indicates how many values to add after the +* contents of tensor in that dimension. +* @li constant_values: An optional int value for pad. +* @li mode: An optional string, Defaults to "constant", indicates paddings mode, +* support "constant", "reflect", "edge" +* @li paddings_contiguous: An optional bool value, Defaults to true. +* If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...] +* If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...] + +*@par Outputs: +*y: A Tensor of the same type as "x". + +*@par Third-party framework compatibility: +* Compatible with ONNX operator Pad. +*/ +REG_OP(PadV3D) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) + .REQUIRED_ATTR(paddings, ListListInt) + .ATTR(constant_values, Int, 0) + .ATTR(mode, String, "constant") + .ATTR(paddings_contiguous, Bool, true) + .OP_END_FACTORY_REG(PadV3D) + +/** *@brief Create a diagonal tensor *@par Inputs: @@ -258,6 +325,9 @@ REG_OP(AscendPadding) /** *@brief EmbeddingRankId, traverse the index calculation server and its position in the server . \n +*@par Restrictions: +*Warning:THIS FUNCTION IS DEPRECATED. Please do not use. \n + *@par Inputs: *One input, include: *addr_table: Tensor which last dimension must be 3. For example: [8, 3]. diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h index edec232d..b97d824f 100644 --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -32,7 +32,7 @@ namespace ge { *@par Inputs: *Inputs include: -* @li logits: A Tensor. Must be one of the following types: float32, float64,double. +* @li logits: A Tensor. Must be one of the following types: float16, float, double. 2-D Tensor with shape [batch_size, num_classes]. * @li num_samples: A Tensor of type int32. 0-D. Number of independent samples to draw for each row slice . \n diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index 7a239732..626dda59 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -502,7 +502,7 @@ REG_OP(ReduceMean) *@par Inputs: *One input: -* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8 . \n +* @li x: A Tensor. Must be one of the following types: float16, float32 . \n *@par Attributes: *@li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType. @@ -521,8 +521,8 @@ REG_OP(ReduceMean) * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMean instead. */ REG_OP(ReduceMeanD) - .INPUT(x, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT, DT_INT8, DT_UINT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT, DT_INT8, DT_UINT8})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(axes, ListInt) .ATTR(keep_dims, Bool, false) .OP_END_FACTORY_REG(ReduceMeanD) diff --git a/third_party/fwkacllib/inc/ops/resource_variable_ops.h b/third_party/fwkacllib/inc/ops/resource_variable_ops.h index a4d54088..fdc76391 100644 --- a/third_party/fwkacllib/inc/ops/resource_variable_ops.h +++ b/third_party/fwkacllib/inc/ops/resource_variable_ops.h @@ -26,6 +26,21 @@ namespace ge { +/** +*@brief Creates a handle to a Variable resource. \n + +*@par Outputs: +*y:A Tensor of type resource. \n + +*@par Attributes: +* @li container: optional, string. +* @li shared_name: optional, string. +* @li dtype: required, type. +* @li shape: optional, ListInt. \n + +*@see VarHandleOp. +*/ + REG_OP(VarHandleOp) .ATTR(container, String, "") .ATTR(shared_name, String, "") @@ -34,6 +49,19 @@ REG_OP(VarHandleOp) .OUTPUT(y, TensorType({DT_RESOURCE})) .OP_END_FACTORY_REG(VarHandleOp) +/** +*@brief Assigns a new value to a variable. \n + +*@par Inputs: +*resource:Handle to the resource in which to store the variable. +*value:The value to set the new tensor to use. \n + +*@par Attributes: +* @li dtype: required, type. \n + +*@see AssignVariableOp. +*/ + REG_OP(AssignVariableOp) .INPUT(resource, TensorType({DT_RESOURCE})) .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \ @@ -41,6 +69,19 @@ REG_OP(AssignVariableOp) .REQUIRED_ATTR(dtype, Type) .OP_END_FACTORY_REG(AssignVariableOp) +/** +*@brief Adds a value to the current value of a variable. \n + +*@par Inputs: +*resource:Handle to the resource in which to store the variable. +*value:The value by which the variable will be incremented. \n + +*@par Attributes: +* @li dtype: required, type. \n + +*@see AssignAddVariableOp. +*/ + REG_OP(AssignAddVariableOp) .INPUT(resource, TensorType({DT_RESOURCE})) .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \ @@ -48,6 +89,19 @@ REG_OP(AssignAddVariableOp) .REQUIRED_ATTR(dtype, Type) .OP_END_FACTORY_REG(AssignAddVariableOp) +/** +*@brief Subtracts a value to the current value of a variable. \n + +*@par Inputs: +*resource:Handle to the resource in which to store the variable. +*value:The value by which the variable will be incremented. \n + +*@par Attributes: +* @li dtype: required, type. \n + +*@see AssignSubVariableOp. +*/ + REG_OP(AssignSubVariableOp) .INPUT(resource, TensorType({DT_RESOURCE})) .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \ diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h index 77437aba..e1a83f43 100644 --- a/third_party/fwkacllib/inc/ops/rnn.h +++ b/third_party/fwkacllib/inc/ops/rnn.h @@ -81,6 +81,9 @@ REG_OP(BasicLSTMCell) *@par Outputs: *output_h:A Tensor of output. Must be the type float32. The format must be FRACTAL_Z. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DynamicLSTM) .INPUT(x, TensorType({DT_FLOAT32})) @@ -306,6 +309,9 @@ REG_OP(LSTMInputGrad) *two outputs: *@li dxt:A 4D Tensor. Must be one of the following types: float16, float32. *@li dht:A 4D Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(BasicLSTMCellInputGrad) .INPUT(dgate, TensorType({DT_FLOAT16})) @@ -328,6 +334,9 @@ REG_OP(BasicLSTMCellInputGrad) *two outputs: *@li dw:A 4D Tensor. Must be one of the following types: float16. *@li db:A 4D Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(BasicLSTMCellWeightGrad) .INPUT(x, TensorType({DT_FLOAT16})) @@ -358,6 +367,9 @@ REG_OP(BasicLSTMCellWeightGrad) *two outputs: *@li dgate:A 4D Tensor. Must be one of the following types: float16. *@li dct_1:A 4D Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(BasicLSTMCellCStateGrad) .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -439,6 +451,9 @@ REG_OP(RNN) *two outputs: *@li o_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li h_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(BasicRNNCell) .INPUT(x, TensorType({DT_FLOAT16})) @@ -460,13 +475,13 @@ REG_OP(BasicRNNCell) *@brief: DynamicGRU calculation. *@par Inputs: *seven inputs: \n -*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. -*@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. -*@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. -*@li cw:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. -*@li cb:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. -*@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND. -*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. +*@li w:Must be one of the following types: float16. The format must be FRACTAL_Z. +*@li b:Must be one of the following types: float16, float32. The format must be ND. +*@li cw:Must be one of the following types: float16. The format must be FRACTAL_Z. +*@li cb:Must be one of the following types: float16, float32. The format must be ND. +*@li seq_length:Must be one of the following types: int32. The format must be ND. +*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@par Attributes: *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. @@ -480,11 +495,11 @@ REG_OP(BasicRNNCell) *@par Outputs: *five outputs: \n -*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li r:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li n:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li i:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li n:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -495,7 +510,7 @@ REG_OP(DynamicGRU) .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(cw, TensorType({DT_FLOAT16})) .INPUT(cb, TensorType({DT_FLOAT16, DT_FLOAT})) - .OPTIONAL_INPUT(seq_length, TensorType({DT_UINT32})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -516,13 +531,13 @@ REG_OP(DynamicGRU) *@brief: DynamicGRUV2 calculation. *@par Inputs: *seven inputs: \n -*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. -*@li weight_input:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. -*@li weight_hidden:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. -*@li bias_input:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. -*@li bias_hidden:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. -*@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND. -*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. +*@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z. +*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. +*@li bias_input:Must be one of the following types: float16, float32. The format must be ND. +*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. +*@li seq_length:Must be one of the following types: int32. The format must be ND. +*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@par Attributes: *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. @@ -538,12 +553,12 @@ REG_OP(DynamicGRU) *@par Outputs: *six outputs: \n -*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -554,7 +569,7 @@ REG_OP(DynamicGRUV2) .INPUT(weight_hidden, TensorType({DT_FLOAT16})) .OPTIONAL_INPUT(bias_input, TensorType({DT_FLOAT16, DT_FLOAT})) .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) - .OPTIONAL_INPUT(seq_length, TensorType({DT_UINT32})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index d17e8e94..613ce358 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -1787,6 +1787,9 @@ REG_OP(TileWithAxis) *@par Outputs: *y: A Tensor of the same type as "x". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ReadSelect) .INPUT(x, TensorType::ALL()) @@ -1802,6 +1805,9 @@ REG_OP(ReadSelect) *@par Outputs: *y: A Tensor. Has the same type as "x". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(WriteSelect) .INPUT(x, TensorType::ALL()) diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index 5414f122..edc55820 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -625,6 +625,9 @@ REG_OP(ConfusionTransposeD) *@par Outputs: *y: A Tensor. Has the same type as "x". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ConfusionTranspose) .INPUT(x, TensorType::BasicType()) diff --git a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h index 2f014937..c96b96be 100644 --- a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h +++ b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h @@ -28,6 +28,9 @@ namespace ge { /** *@brief Applies a perspective transformation to an image . \n +*@par Restrictions: +*Warning:THIS FUNCTION IS DEPRECATED. Please do not use. \n + *@par Inputs: *@li x: input tensor, format NCHW, type must be float. *@li matrix: transformation matrix, format ND , shape must be (N, 9), type must be float . \n diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index 8e159dd7..8c1a4326 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -159,7 +159,12 @@ typedef struct rtAiCoreMemorySize { * @ingroup dvrt_mem * @brief memory type */ -typedef enum tagRtMemoryType { RT_MEMORY_TYPE_HOST = 1, RT_MEMORY_TYPE_DEVICE = 2 } rtMemoryType_t; +typedef enum tagRtMemoryType { + RT_MEMORY_TYPE_HOST = 1, + RT_MEMORY_TYPE_DEVICE = 2 , + RT_MEMORY_TYPE_SVM = 3, + RT_MEMORY_TYPE_DVPP = 4 +} rtMemoryType_t; /** * @ingroup dvrt_mem @@ -167,8 +172,8 @@ typedef enum tagRtMemoryType { RT_MEMORY_TYPE_HOST = 1, RT_MEMORY_TYPE_DEVICE = */ typedef struct tagRtPointerAttributes { rtMemoryType_t memoryType; // host memory or device memory + rtMemoryType_t locationType; uint32_t deviceID; // device ID - uint32_t isManaged; uint32_t pageSize; } rtPointerAttributes_t; diff --git a/third_party/fwkacllib/inc/tdt/status.h b/third_party/fwkacllib/inc/tdt/status.h index 87ae8f75..185d2b9c 100644 --- a/third_party/fwkacllib/inc/tdt/status.h +++ b/third_party/fwkacllib/inc/tdt/status.h @@ -100,6 +100,8 @@ enum { TDT_TSD_SEND_HEARTBEAT_FAILED_CODE, TDT_TSD_CLEAN_RESOURCE_FAILED_CODE, TDT_TSD_SEND_MSG_FAILED_CODE, + TDT_TSD_AICPU_SD_PROCESS_ABNORMAL_CODE, + TDT_TSD_CUSTOM_PROCESS_ABNORMAL_CODE, TDT_PPC_DRIVER_INIT_FAIL_CODE, TDT_PPC_SERVER_CLIENT_CREATE_FAIL_CODE, TDT_PPC_SERVER_CLIENT_DESTORY_FAIL_CODE, @@ -510,6 +512,8 @@ TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_INIT_HDCSERVER_FAILED, " TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_SEND_HEARTBEAT_FAILED, "Tsdaemon get pid fail"); TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEAN_RESOURCE_FAILED, "Tsdaemon clean resource fail"); TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_SEND_MSG_FAILED, "Tsdaemon send msg fail"); +TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_AICPU_SD_PROCESS_ABNORMAL, "aicpu_sd process abnormal"); +TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_INFO, TDT_TSD_CUSTOM_PROCESS_ABNORMAL, "custom_aicpu_sd process abnormal"); /********************* PPC ****************************/ // create PPC error level error diff --git a/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h new file mode 100644 index 00000000..a1c39a51 --- /dev/null +++ b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h @@ -0,0 +1,36 @@ +/** +* @file adx_datadump_server.h +* +* Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#ifndef ADX_DATADUMP_SERVER_H +#define ADX_DATADUMP_SERVER_H +#ifdef __cplusplus +extern "C" { +#endif +/** + * @brief initialize server for normal datadump function. + * @return + * IDE_DAEMON_OK: datadump server init success + * IDE_DAEMON_ERROR: datadump server init failed + */ +int AdxDataDumpServerInit(); + +/** + * @brief uninitialize server for normal datadump function. + * @return + * IDE_DAEMON_OK: datadump server uninit success + * IDE_DAEMON_ERROR: datadump server uninit failed + */ +int AdxDataDumpServerUnInit(); + +#ifdef __cplusplus +} +#endif +#endif + From dcc1768c68ea7b107d2c9a20eb57d431c5b073ca Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Wed, 30 Sep 2020 15:44:56 +0800 Subject: [PATCH 2/7] fix securec download links due to mistakes made by openeuler community --- cmake/external_libs/securec.cmake | 4 ++-- third_party/patch/securec/securec.patch001 | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/external_libs/securec.cmake b/cmake/external_libs/securec.cmake index 83a4409d..2fbf8b80 100644 --- a/cmake/external_libs/securec.cmake +++ b/cmake/external_libs/securec.cmake @@ -1,7 +1,7 @@ graphengine_add_pkg(securec VER 1.1.10 - URL https://gitee.com/openeuler/bounds_checking_function/repository/archive/v1.1.10.tar.gz - MD5 0782dd2351fde6920d31a599b23d8c91 + URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz + MD5 193f0ca5246c1dd84920db34d2d8249f LIBS c_sec PATCHES ${GE_SOURCE_DIR}/third_party/patch/securec/securec.patch001 CMAKE_OPTION "-DCMAKE_BUILD_TYPE=Release" diff --git a/third_party/patch/securec/securec.patch001 b/third_party/patch/securec/securec.patch001 index 666f28ce..01c2d769 100644 --- a/third_party/patch/securec/securec.patch001 +++ b/third_party/patch/securec/securec.patch001 @@ -1,5 +1,5 @@ -diff -Npur -x .git bounds_checking_function/CMakeLists.txt securec/CMakeLists.txt ---- bounds_checking_function/CMakeLists.txt 1970-01-01 08:00:00.000000000 +0800 +diff -Npur -x .git libboundscheck/CMakeLists.txt securec/CMakeLists.txt +--- libboundscheck/CMakeLists.txt 1970-01-01 08:00:00.000000000 +0800 +++ securec/CMakeLists.txt 2020-09-19 16:53:48.689460700 +0800 @@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 3.14) From fe038d0ae551931b861f7ad27ff579ef378889b0 Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Wed, 30 Sep 2020 16:30:32 +0800 Subject: [PATCH 3/7] add libadump_server support in ge_lib_path mode --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 266ea024..0ae9c88f 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,6 +73,7 @@ elseif(DEFINED ENV{D_LINK_PATH}) find_library(hccl libhccl.so ${GE_LIB_PATH}) find_library(resource libresource.so ${GE_LIB_PATH}) find_library(error_manager liberror_manager.so ${GE_LIB_PATH}) + find_library(adump_server libadump_server.a ${GE_LIB_PATH}) else() # Ascend mode if(DEFINED ENV{ASCEND_CUSTOM_PATH}) From 250465a71f8520b792cceaabc816318b7fcfa552 Mon Sep 17 00:00:00 2001 From: wuweikang Date: Fri, 9 Oct 2020 12:33:07 +0800 Subject: [PATCH 4/7] sync-from-trunk-to-blue-zone-1009 --- CMakeLists.txt | 5 +- cmake/external_libs/securec.cmake | 4 +- inc/framework/common/string_util.h | 2 + inc/framework/omg/omg_inner_types.h | 3 + inc/graph/debug/ge_attr_define.h | 4 + inc/graph/op_desc.h | 3 +- inc/graph/range_vistor.h | 4 + inc/graph/utils/op_desc_utils.h | 1 + src/common/graph/detail/attributes_holder.cc | 2 +- src/common/graph/ge_attr_define.cc | 4 + src/common/graph/node.cc | 7 +- src/common/graph/op_desc.cc | 42 ++- src/common/graph/ref_relation.cc | 2 +- src/common/graph/utils/op_desc_utils.cc | 47 +++ src/ge/CMakeLists.txt | 6 +- src/ge/client/CMakeLists.txt | 2 + src/ge/client/ge_api.cc | 4 + src/ge/client/module.mk | 6 +- src/ge/common/dump/dump_op.cc | 20 +- src/ge/common/ge/datatype_util.cc | 2 +- src/ge/common/profiling/profiling_manager.cc | 32 +- src/ge/common/profiling/profiling_manager.h | 2 +- src/ge/common/util.cc | 2 +- src/ge/executor/CMakeLists.txt | 1 + src/ge/executor/module.mk | 5 + src/ge/ge_inference.mk | 4 +- src/ge/ge_local_engine/CMakeLists.txt | 2 +- src/ge/ge_local_engine/module.mk | 2 +- src/ge/ge_runner.mk | 1 + src/ge/generator/ge_generator.cc | 11 +- src/ge/graph/build/memory/graph_mem_assigner.cc | 341 +++++++++------------ src/ge/graph/build/memory/graph_mem_assigner.h | 11 +- src/ge/graph/load/new_model_manager/data_dumper.cc | 17 +- src/ge/graph/load/new_model_manager/data_dumper.h | 1 + .../graph/load/new_model_manager/davinci_model.cc | 15 +- .../graph/load/new_model_manager/model_manager.cc | 2 +- src/ge/graph/load/new_model_manager/model_utils.cc | 13 +- src/ge/graph/passes/attach_stream_label_pass.cc | 21 +- src/ge/graph/passes/attach_stream_label_pass.h | 6 +- src/ge/graph/passes/enter_pass.cc | 60 +++- src/ge/graph/passes/enter_pass.h | 3 + src/ge/graph/preprocess/multi_batch_copy_graph.cc | 63 +--- src/ge/graph/preprocess/multi_batch_copy_graph.h | 2 - src/ge/host_cpu_engine/module.mk | 2 +- src/ge/init/gelib.cc | 10 +- src/ge/init/gelib.h | 2 +- src/ge/ir_build/atc_ir_common.cc | 2 +- src/ge/ir_build/ge_ir_build.cc | 6 + src/ge/opskernel_manager/ops_kernel_manager.cc | 14 +- src/ge/session/omg.cc | 16 +- src/ge/single_op/single_op.cc | 2 +- src/ge/single_op/task/build_task_utils.cc | 5 +- src/ge/single_op/task/op_task.cc | 163 +++++----- src/ge/single_op/task/op_task.h | 27 +- third_party/fwkacllib/inc/ops/aipp.h | 15 +- .../fwkacllib/inc/ops/elewise_calculation_ops.h | 155 +++++++--- third_party/fwkacllib/inc/ops/functional_ops.h | 20 +- third_party/fwkacllib/inc/ops/image_ops.h | 7 +- third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h | 160 ++++++---- third_party/fwkacllib/inc/ops/nn_calculation_ops.h | 73 ++++- third_party/fwkacllib/inc/ops/nn_detect_ops.h | 25 +- third_party/fwkacllib/inc/ops/nn_norm_ops.h | 36 +-- third_party/fwkacllib/inc/ops/nn_pooling_ops.h | 3 - third_party/fwkacllib/inc/ops/nn_training_ops.h | 39 --- third_party/fwkacllib/inc/ops/pad_ops.h | 12 - .../fwkacllib/inc/ops/ragged_conversion_ops.h | 4 +- third_party/fwkacllib/inc/ops/random_ops.h | 3 - third_party/fwkacllib/inc/ops/reduce_ops.h | 24 -- third_party/fwkacllib/inc/ops/rnn.h | 6 +- third_party/fwkacllib/inc/ops/save_ops.h | 2 +- third_party/fwkacllib/inc/ops/sdca_ops.h | 13 +- third_party/fwkacllib/inc/ops/selection_ops.h | 55 +--- .../fwkacllib/inc/ops/split_combination_ops.h | 15 +- third_party/fwkacllib/inc/ops/transformation_ops.h | 21 -- third_party/fwkacllib/inc/runtime/base.h | 5 +- third_party/fwkacllib/inc/runtime/config.h | 4 +- third_party/fwkacllib/inc/runtime/context.h | 4 +- third_party/fwkacllib/inc/runtime/dev.h | 4 +- third_party/fwkacllib/inc/runtime/dvfsprofile.h | 4 +- third_party/fwkacllib/inc/runtime/event.h | 4 +- third_party/fwkacllib/inc/runtime/kernel.h | 4 +- third_party/fwkacllib/inc/runtime/mem.h | 4 +- third_party/fwkacllib/inc/runtime/rt_model.h | 4 +- third_party/fwkacllib/inc/runtime/stream.h | 4 +- .../fwkacllib/inc/toolchain/adx_datadump_server.h | 22 +- third_party/patch/securec/securec.patch001 | 4 +- 86 files changed, 933 insertions(+), 858 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 266ea024..457fa086 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,11 +68,12 @@ elseif(DEFINED ENV{D_LINK_PATH}) find_library(slog libslog.so ${GE_LIB_PATH}) find_library(mmpa libmmpa.so ${GE_LIB_PATH}) find_library(runtime libruntime.so ${GE_LIB_PATH}) - find_library(msprof libmsprof.so ${GE_LIB_PATH}) + find_library(msprof libmsprofiler.a ${GE_LIB_PATH}) find_library(register libregister.so ${GE_LIB_PATH}) find_library(hccl libhccl.so ${GE_LIB_PATH}) find_library(resource libresource.so ${GE_LIB_PATH}) find_library(error_manager liberror_manager.so ${GE_LIB_PATH}) + find_library(adump_server libadump_server.a ${GE_LIB_PATH}) else() # Ascend mode if(DEFINED ENV{ASCEND_CUSTOM_PATH}) @@ -84,7 +85,7 @@ else() set(ASCEND_RUNTIME_DIR ${ASCEND_DIR}/fwkacllib/lib64) find_library(slog libslog.so ${ASCEND_DRIVER_DIR}) find_library(mmpa libmmpa.so ${ASCEND_DRIVER_DIR}) - find_library(msprof libmsprof.so ${ASCEND_DRIVER_DIR}) + find_library(msprof libmsprofiler.a ${ASCEND_RUNTIME_DIR}) find_library(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) find_library(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) diff --git a/cmake/external_libs/securec.cmake b/cmake/external_libs/securec.cmake index 83a4409d..2fbf8b80 100644 --- a/cmake/external_libs/securec.cmake +++ b/cmake/external_libs/securec.cmake @@ -1,7 +1,7 @@ graphengine_add_pkg(securec VER 1.1.10 - URL https://gitee.com/openeuler/bounds_checking_function/repository/archive/v1.1.10.tar.gz - MD5 0782dd2351fde6920d31a599b23d8c91 + URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz + MD5 193f0ca5246c1dd84920db34d2d8249f LIBS c_sec PATCHES ${GE_SOURCE_DIR}/third_party/patch/securec/securec.patch001 CMAKE_OPTION "-DCMAKE_BUILD_TYPE=Release" diff --git a/inc/framework/common/string_util.h b/inc/framework/common/string_util.h index 918a3950..3e4bf093 100644 --- a/inc/framework/common/string_util.h +++ b/inc/framework/common/string_util.h @@ -61,8 +61,10 @@ class StringUtils { /// @param [in] delim separator /// @return string array after segmentation /// + /*lint -e1077*/ static std::vector Split(const std::string &str, char delim) { std::vector elems; + /*lint +e1077*/ if (str.empty()) { elems.emplace_back(""); diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h index 2f91d7aa..e1a7da0b 100644 --- a/inc/framework/omg/omg_inner_types.h +++ b/inc/framework/omg/omg_inner_types.h @@ -92,6 +92,9 @@ struct OmgContext { std::map> out_nodes_map; // user-designate out nodes (this is used for determing the orders) std::vector> user_out_nodes; + // save the output node of the network, value = topName, + // topName indicates the output name of the operator. + std::vector user_out_nodes_top_vec; // net out nodes (where user_out_nodes or leaf nodes) std::vector net_out_nodes; // net out nodes top names(only caffe has top) diff --git a/inc/graph/debug/ge_attr_define.h b/inc/graph/debug/ge_attr_define.h index 7538ba6a..47b11ba8 100644 --- a/inc/graph/debug/ge_attr_define.h +++ b/inc/graph/debug/ge_attr_define.h @@ -1052,6 +1052,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_FLAG; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_MODE; +// op dynamic input +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_INPUT_START; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_INPUT_END; + // functional ops attr GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_THEN_BRANCH; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_ELSE_BRANCH; diff --git a/inc/graph/op_desc.h b/inc/graph/op_desc.h index c7da30b7..4d724c42 100644 --- a/inc/graph/op_desc.h +++ b/inc/graph/op_desc.h @@ -235,7 +235,8 @@ class OpDesc : public std::enable_shared_from_this, public AttrHolder { vector GetOpInferDepends() const; string GetInputNameByIndex(uint32_t index) const; - + string GetValidInputNameByIndex(uint32_t index) const; + int GetValidInputIndexByName(const string &name) const; int GetInputIndexByName(const string &name) const; string GetOutputNameByIndex(uint32_t index) const; diff --git a/inc/graph/range_vistor.h b/inc/graph/range_vistor.h index 20905bd9..8635d413 100644 --- a/inc/graph/range_vistor.h +++ b/inc/graph/range_vistor.h @@ -22,8 +22,10 @@ template class RangeVistor { public: + /*lint -e151*/ using Iterator = typename std::vector::iterator; using ConstIterator = typename std::vector::const_iterator; + /*lint +e151*/ RangeVistor(O owner, const std::vector &vs) : owner_(owner), elements_(vs) {} @@ -41,7 +43,9 @@ class RangeVistor { bool empty() const { return elements_.empty(); } + /*lint -e659*/ E &at(std::size_t index) { return elements_.at(index); } + /*lint +e659*/ const E &at(std::size_t index) const { return elements_.at(index); } diff --git a/inc/graph/utils/op_desc_utils.h b/inc/graph/utils/op_desc_utils.h index 6a9a4695..daa95ebe 100644 --- a/inc/graph/utils/op_desc_utils.h +++ b/inc/graph/utils/op_desc_utils.h @@ -53,6 +53,7 @@ class OpDescUtils { static vector MutableWeights(const ge::NodePtr node); static graphStatus SetWeights(ge::Node& node, const vector& weights); static graphStatus SetWeights(ge::NodePtr node, const vector& weights); + static graphStatus SetWeights(ge::Node& node, const map& weights_map); static graphStatus ClearWeights(ge::NodePtr node); static bool ClearInputDesc(ge::OpDescPtr op_desc, uint32_t index); diff --git a/src/common/graph/detail/attributes_holder.cc b/src/common/graph/detail/attributes_holder.cc index 113f4b6f..7e3b6de9 100644 --- a/src/common/graph/detail/attributes_holder.cc +++ b/src/common/graph/detail/attributes_holder.cc @@ -28,7 +28,7 @@ using std::unordered_set; void AttrHolder::CopyAttrsFrom(const AttrHolder &holder) { MutableAttrMap().CopyValueFrom(holder.GetAttrMap()); } graphStatus AttrHolder::SetAttr(const std::string &name, const GeAttrValue &value) { if (value.IsEmpty()) { - GELOGE(GRAPH_FAILED, "value is empty, key %s", name.c_str()); + GELOGE(GRAPH_FAILED, "value is empty, key of the attr is %s", name.c_str()); return GRAPH_FAILED; } auto proto_map = MutableAttrMap().GetProtoMsg(); diff --git a/src/common/graph/ge_attr_define.cc b/src/common/graph/ge_attr_define.cc index cd504812..9b723bb3 100644 --- a/src/common/graph/ge_attr_define.cc +++ b/src/common/graph/ge_attr_define.cc @@ -1060,6 +1060,10 @@ const std::string ATTR_NAME_HCCL_FUSED_FLAG = "_hccl_fused_node"; const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR = "_alloc_fixed_addr"; const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX = "_alloc_fixed_addr_index"; +// op dynamic input +const std::string ATTR_NAME_DYNAMIC_INPUT_START = "_dynamic_input_index_start"; +const std::string ATTR_NAME_DYNAMIC_INPUT_END = "_dynamic_input_index_end"; + // atc user def dtype&format const std::string ATTR_ATC_USER_DEFINE_DATATYPE = "_user_defined_data_type"; const std::string ATTR_ATC_USER_DEFINE_FORMAT = "_user_defined_format"; diff --git a/src/common/graph/node.cc b/src/common/graph/node.cc index 10d6b3ed..d33c6008 100644 --- a/src/common/graph/node.cc +++ b/src/common/graph/node.cc @@ -762,9 +762,10 @@ graphStatus Node::Verify() const { if (!is_unknown_graph) { for (const auto &in_anchor_ptr : GetAllInDataAnchors()) { GE_IF_BOOL_EXEC(in_anchor_ptr == nullptr, GELOGW("in anchor ptr is null"); continue); - bool valid_anchor = op_->GetType() == data_type || op_->GetType() == aipp_data_type || - op_->GetType() == const_type || op_->GetType() == variable_type || - op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || in_anchor_ptr->GetPeerAnchors().size() > 0; + bool valid_anchor = + op_->GetType() == data_type || op_->GetType() == aipp_data_type || op_->GetType() == const_type || + op_->GetType() == variable_type || op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || + op_->MutableInputDesc(in_anchor_ptr->GetIdx()) == nullptr || in_anchor_ptr->GetPeerAnchors().size() > 0; if (!valid_anchor) { ErrorManager::GetInstance().ATCReportErrMessage("E11019", {"opname", "index"}, {GetName(), std::to_string(in_anchor_ptr->GetIdx())}); diff --git a/src/common/graph/op_desc.cc b/src/common/graph/op_desc.cc index fdd1acb7..dee0aece 100644 --- a/src/common/graph/op_desc.cc +++ b/src/common/graph/op_desc.cc @@ -347,7 +347,10 @@ graphStatus OpDesc::AddOptionalInputDesc(const string &name, const ge::GeTensorD GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus OpDesc::UpdateInputDesc(uint32_t index, const ge::GeTensorDesc &tensor_Desc) { - GE_CHK_BOOL_RET_STATUS((index < inputs_desc_.size()), GRAPH_FAILED, "The index is invalid. index[%u]", index); + if (index >= inputs_desc_.size()) { + GELOGW("The index is invalid. index[%u]", index); + return GRAPH_FAILED; + } inputs_desc_[index] = ComGraphMakeShared(tensor_Desc); if (inputs_desc_[index] == nullptr) { @@ -949,6 +952,43 @@ int OpDesc::GetInputIndexByName(const string &name) const { return static_cast(it_find->second); } +int OpDesc::GetValidInputIndexByName(const string &name) const { + map valid_input_name_idx{}; + uint32_t j = 0; + for (size_t i = 0; i < GetAllInputsSize(); i++) { + if (MutableInputDesc(static_cast(i)) != nullptr) { + auto valid_name = GetInputNameByIndex(static_cast(i)); + GE_CHK_BOOL_RET_STATUS_NOLOG(!valid_name.empty(), -1); + valid_input_name_idx.insert({valid_name, j}); + j++; + } + } + auto it_find = valid_input_name_idx.find(name); + GE_CHK_BOOL_RET_STATUS_NOLOG(it_find != valid_input_name_idx.end(), -1); + return static_cast(it_find->second); +} + +string OpDesc::GetValidInputNameByIndex(uint32_t index) const { + map valid_input_name_idx{}; + uint32_t j = 0; + for (size_t i = 0; i < GetAllInputsSize(); i++) { + if (MutableInputDesc(static_cast(i)) != nullptr) { + auto valid_name = GetInputNameByIndex(static_cast(i)); + GE_CHK_BOOL_RET_STATUS_NOLOG(!valid_name.empty(), ""); + valid_input_name_idx.insert({valid_name, j}); + j++; + } + } + auto it = valid_input_name_idx.begin(); + for (; it != valid_input_name_idx.end(); ++it) { + if (it->second == index) { + break; + } + } + GE_CHK_BOOL_RET_STATUS_NOLOG(it != valid_input_name_idx.end(), ""); + return it->first; +} + GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY string OpDesc::GetOutputNameByIndex(uint32_t index) const { auto it = output_name_idx_.begin(); for (; it != output_name_idx_.end(); ++it) { diff --git a/src/common/graph/ref_relation.cc b/src/common/graph/ref_relation.cc index 9a9f66ba..48e136fb 100644 --- a/src/common/graph/ref_relation.cc +++ b/src/common/graph/ref_relation.cc @@ -56,7 +56,7 @@ class RefRelations::Impl { } return GRAPH_SUCCESS; } - GELOGW("can not find any relations! key value is %s", lookup_key.c_str()); + GELOGW("can not find any relations! key value of dest relation is %s", lookup_key.c_str()); return GRAPH_SUCCESS; }; graphStatus BuildRefRelations(ge::ComputeGraph &root_graph); diff --git a/src/common/graph/utils/op_desc_utils.cc b/src/common/graph/utils/op_desc_utils.cc index 63fff177..17c80b2c 100644 --- a/src/common/graph/utils/op_desc_utils.cc +++ b/src/common/graph/utils/op_desc_utils.cc @@ -560,6 +560,53 @@ OpDescUtils::SetWeights(ge::Node &node, const vector &weights) return GRAPH_SUCCESS; } +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus +OpDescUtils::SetWeights(ge::Node &node, const map &weights_map) { + GE_CHECK_NOTNULL(node.GetOpDesc()); + // 1. node is const + if (node.GetOpDesc()->GetType() == CONSTANT) { + if (weights_map.size() == CONST_OP_NORMAL_WEIGHT_SIZE) { + return SetWeights(node.GetOpDesc(), weights_map.begin()->second); + } + GELOGE(GRAPH_PARAM_INVALID, "const op %s weight size %zu should be 1", node.GetName().c_str(), weights_map.size()); + return GRAPH_PARAM_INVALID; + } + // 2. node is not const + for (const auto &pair : weights_map) { + auto in_data_anchor = node.GetInDataAnchor(pair.first); + if (in_data_anchor != nullptr && in_data_anchor->GetPeerOutAnchor() != nullptr) { + // a. update const input node + auto out_anchor = in_data_anchor->GetPeerOutAnchor(); + auto peer_node = out_anchor->GetOwnerNode(); + if (peer_node == nullptr) { + GELOGE(GRAPH_PARAM_INVALID, "op %s [%d]'s input node is null", node.GetName().c_str(), pair.first); + return GRAPH_PARAM_INVALID; + } + if (peer_node->GetType() != CONSTANT) { + GELOGE(GRAPH_PARAM_INVALID, " op %s [%d]'s input node should be const, but is %s type:%s ", + node.GetName().c_str(), pair.first, peer_node->GetName().c_str(), peer_node->GetType().c_str()); + } + SetWeights(peer_node->GetOpDesc(), pair.second); + } else { + // b. create new const input node + auto const_opdesc = CreateConstOp(pair.second); + GE_CHECK_NOTNULL(const_opdesc); + auto owner_graph = node.GetOwnerComputeGraph(); + if (owner_graph == nullptr) { + GELOGE(GRAPH_PARAM_INVALID, "node's graph is empty, name: %s", node.GetName().c_str()); + return GRAPH_PARAM_INVALID; + } + auto const_node = owner_graph->AddNodeFront(const_opdesc); + if (node.AddLinkFrom(static_cast(pair.first), const_node) != GRAPH_SUCCESS) { + GELOGE(GRAPH_FAILED, "op %s add const to input index[%d] failed", node.GetName().c_str(), pair.first); + return GRAPH_FAILED; + } + } + } + NodeUtils::UpdateIsInputConst(node); + return GRAPH_SUCCESS; +} + OpDescPtr OpDescUtils::CreateConstOp(const GeTensorPtr &tensor_ptr) { GE_CHK_BOOL_EXEC(tensor_ptr != nullptr, return nullptr, "tensor_ptr is nullptr!"); shared_ptr const_opdesc = ComGraphMakeShared(); diff --git a/src/ge/CMakeLists.txt b/src/ge/CMakeLists.txt index db00d8a1..3f4f1a8b 100755 --- a/src/ge/CMakeLists.txt +++ b/src/ge/CMakeLists.txt @@ -229,6 +229,7 @@ target_link_libraries(ge_runner ${resouce} ${ascend_hal} ${adump_server} + ${msprofiler} rt dl) @@ -358,7 +359,10 @@ add_library(ge_compiler SHARED ${INFER_SRC_LIST} ${PROTO_SRCS} ${PROTO_HEADER_HD target_compile_definitions(ge_compiler PRIVATE PROTOBUF_INLINE_NOT_IN_HEADERS=0 REUSE_MEMORY=1 - FMK_HOST_INFER) + FMK_HOST_INFER + FMK_SUPPORT_DUMP + COMPILE_OMG_PACKAGE + REUSE_MEMORY=1) target_link_libraries(ge_compiler graph ge_common diff --git a/src/ge/client/CMakeLists.txt b/src/ge/client/CMakeLists.txt index a87beb77..b568e3f6 100755 --- a/src/ge/client/CMakeLists.txt +++ b/src/ge/client/CMakeLists.txt @@ -68,5 +68,7 @@ target_link_libraries(ge_client ${mmpa} ${runtime} ${msprof} + ${msprofiler} + ${ascend_hal} rt dl) diff --git a/src/ge/client/ge_api.cc b/src/ge/client/ge_api.cc index ad01e48f..7c4cf9c8 100644 --- a/src/ge/client/ge_api.cc +++ b/src/ge/client/ge_api.cc @@ -16,6 +16,7 @@ #include "ge/ge_api.h" #include +#include #include "common/debug/log.h" #include "framework/common/debug/ge_log.h" #include "common/ge/datatype_util.h" @@ -163,6 +164,9 @@ Status GEFinalize() { g_ge_initialized = false; } + // to avoid memory fragment, use malloc_trim to back free stack to system + malloc_trim(0); + GELOGT(TRACE_STOP, "GEFinalize finished"); return ret; } diff --git a/src/ge/client/module.mk b/src/ge/client/module.mk index 1a304cbf..476841c9 100644 --- a/src/ge/client/module.mk +++ b/src/ge/client/module.mk @@ -70,9 +70,10 @@ LOCAL_SHARED_LIBRARIES := \ libregister \ libge_compiler \ libge_common \ - libmsprof - + libmsprof \ + stub/libascend_hal +LOCAL_STATIC_LIBRARIES := libmsprofiler LOCAL_LDFLAGS := -lrt -ldl @@ -107,6 +108,7 @@ LOCAL_SHARED_LIBRARIES := \ libge_common \ libmsprof +LOCAL_STATIC_LIBRARIES := libmsprofiler LOCAL_LDFLAGS := -lrt -ldl LOCAL_CFLAGS += \ diff --git a/src/ge/common/dump/dump_op.cc b/src/ge/common/dump/dump_op.cc index 31a88023..8c4ff330 100644 --- a/src/ge/common/dump/dump_op.cc +++ b/src/ge/common/dump/dump_op.cc @@ -172,18 +172,18 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { return RT_FAILED; } - constexpr int32_t ioAddrNum = 2; - constexpr uint32_t argsSize = sizeof(aicpu::AicpuParamHead) + ioAddrNum * sizeof(uint64_t); - char args[argsSize] = {0}; - auto paramHead = reinterpret_cast(args); - paramHead->length = argsSize; - paramHead->ioAddrNum = ioAddrNum; - auto ioAddr = reinterpret_cast(args + sizeof(aicpu::AicpuParamHead)); - ioAddr[0] = reinterpret_cast(proto_dev_mem_); - ioAddr[1] = reinterpret_cast(proto_size_dev_mem_); + constexpr int32_t io_addr_num = 2; + constexpr uint32_t args_size = sizeof(aicpu::AicpuParamHead) + io_addr_num * sizeof(uint64_t); + char args[args_size] = {0}; + auto param_head = reinterpret_cast(args); + param_head->length = args_size; + param_head->ioAddrNum = io_addr_num; + auto io_addr = reinterpret_cast(args + sizeof(aicpu::AicpuParamHead)); + io_addr[0] = reinterpret_cast(proto_dev_mem_); + io_addr[1] = reinterpret_cast(proto_size_dev_mem_); rt_ret = rtCpuKernelLaunch(nullptr, kDumpKernelsDumpOp, 1, // blockDim default 1 - args, argsSize, + args, args_size, nullptr, // no need smDesc stream_); if (rt_ret != RT_ERROR_NONE) { diff --git a/src/ge/common/ge/datatype_util.cc b/src/ge/common/ge/datatype_util.cc index f2ff12cb..79a473fe 100644 --- a/src/ge/common/ge/datatype_util.cc +++ b/src/ge/common/ge/datatype_util.cc @@ -34,7 +34,7 @@ std::map> g_reverse_translatable_data_ty {ge::DT_INT32, {ge::DT_BOOL, ge::DT_INT64}}, {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}}; -static const std::map g_dump_data_type_map = { +std::map g_dump_data_type_map = { // key:ge datatype,value:proto datatype {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED}, {ge::DT_FLOAT, ge::proto::DT_FLOAT}, diff --git a/src/ge/common/profiling/profiling_manager.cc b/src/ge/common/profiling/profiling_manager.cc index d02f7e8f..9492045c 100644 --- a/src/ge/common/profiling/profiling_manager.cc +++ b/src/ge/common/profiling/profiling_manager.cc @@ -51,12 +51,13 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager &ProfilingMana return profiling_manager; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options, - bool convert_2_phy_device_id) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options) { #ifdef DAVINCI_SUPPORT_PROFILING vector().swap(device_id_); job_id_ = options.job_id; + GELOGI("ProfilingManager::Init job_id:%s", job_id_.c_str()); + Status ret; if (!recv_profiling_config_.empty()) { GELOGI("Profiling json config from acl:%s", recv_profiling_config_.c_str()); @@ -64,18 +65,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In } else { ret = InitFromOptions(options); if (ret == SUCCESS && is_load_profiling_) { - // profiling need phy device id - if (!convert_2_phy_device_id) { - device_id_.push_back(options.device_id); - } else { - uint32_t phy_device_id = 0; - rtError_t rt_ret = rtGetDevicePhyIdByIndex(static_cast(options.device_id), &phy_device_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); - return FAILED; - } - device_id_.push_back(phy_device_id); - } + device_id_.push_back(options.device_id); } } if (ret != SUCCESS) { @@ -557,25 +547,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr return; } GELOGI("current logic_device_id:%d", logic_device_id); - - uint32_t phy_device_id = 0; - rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); - return; - } - GELOGI("current phy_device_id:%d", phy_device_id); if (!is_acl_api_mode_) { - auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id); + auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id); if (ret == device_id_.end()) { GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); return; } } GELOGI("start ProfilingTaskDescInfo."); - ProfilingTaskDescInfo(task_desc_info, phy_device_id); + ProfilingTaskDescInfo(task_desc_info, logic_device_id); GELOGI("start ProfilingGraphDescInfo."); - ProfilingGraphDescInfo(compute_graph_desc_info, phy_device_id); + ProfilingGraphDescInfo(compute_graph_desc_info, logic_device_id); GELOGI("Report profiling data for GE end."); #endif } diff --git a/src/ge/common/profiling/profiling_manager.h b/src/ge/common/profiling/profiling_manager.h index f4249451..a030efd3 100644 --- a/src/ge/common/profiling/profiling_manager.h +++ b/src/ge/common/profiling/profiling_manager.h @@ -69,7 +69,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { ProfilingManager(); virtual ~ProfilingManager(); static ProfilingManager &Instance(); - ge::Status Init(const Options &options, bool convert_2_phy_device_id = false); + ge::Status Init(const Options &options); ge::Status InitFromOptions(const Options &options); ge::Status InitFromAclCfg(const std::string &config); ge::Status StartProfiling(int32_t iter, int32_t device_id); diff --git a/src/ge/common/util.cc b/src/ge/common/util.cc index ce5aa57e..4adf3ebd 100644 --- a/src/ge/common/util.cc +++ b/src/ge/common/util.cc @@ -472,7 +472,7 @@ FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::str return true; } - ret = regexec(®, str.c_str(), 0, nullptr, 0); + ret = regexec(®, str.c_str(), 0, NULL, 0); if (ret) { regerror(ret, ®, ebuff, kMaxBuffSize); GELOGE(ge::PARAM_INVALID, "regexec failed, reason: %s", ebuff); diff --git a/src/ge/executor/CMakeLists.txt b/src/ge/executor/CMakeLists.txt index 7358585a..b68507bd 100755 --- a/src/ge/executor/CMakeLists.txt +++ b/src/ge/executor/CMakeLists.txt @@ -120,6 +120,7 @@ target_link_libraries(ge_executor ${mmpa} ${msprof} ${error_manager} + ${ascend_hal} rt dl) diff --git a/src/ge/executor/module.mk b/src/ge/executor/module.mk index bb642da9..1c3efe4c 100644 --- a/src/ge/executor/module.mk +++ b/src/ge/executor/module.mk @@ -89,6 +89,7 @@ local_ge_executor_shared_library := \ libregister \ libmsprof \ liberror_manager \ + libascend_hal local_ge_executor_ldflags := -lrt -ldl \ @@ -104,6 +105,7 @@ LOCAL_SRC_FILES := $(local_ge_executor_src_files) LOCAL_C_INCLUDES := $(local_ge_executor_c_include) LOCAL_SHARED_LIBRARIES := $(local_ge_executor_shared_library) +LOCAL_STATIC_LIBRARIES := libmsprofiler ifeq ($(device_os),android) LOCAL_LDFLAGS += -ldl LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog @@ -140,6 +142,9 @@ LOCAL_SHARED_LIBRARIES := \ libregister \ libmsprof \ liberror_manager \ + stub/libascend_hal + +LOCAL_STATIC_LIBRARIES := libmsprofiler LOCAL_LDFLAGS += $(local_ge_executor_ldflags) diff --git a/src/ge/ge_inference.mk b/src/ge/ge_inference.mk index 232e79ec..621e42c5 100644 --- a/src/ge/ge_inference.mk +++ b/src/ge/ge_inference.mk @@ -355,7 +355,7 @@ LOCAL_MODULE := libge_compiler LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 # from ome_inference.mk -LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP +LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE ifeq ($(DEBUG), 1) LOCAL_CFLAGS += -g -O0 endif @@ -418,7 +418,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := libge_compiler LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY -DNONSUPPORT_SAVE_TO_FILE LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP +LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE LOCAL_CFLAGS += -DOMG_DEVICE_VERSION LOCAL_CFLAGS += -O2 LOCAL_MODULE_CLASS := SHARED_LIBRARIES diff --git a/src/ge/ge_local_engine/CMakeLists.txt b/src/ge/ge_local_engine/CMakeLists.txt index e685c301..bcbc3e4c 100755 --- a/src/ge/ge_local_engine/CMakeLists.txt +++ b/src/ge/ge_local_engine/CMakeLists.txt @@ -42,7 +42,7 @@ include_directories(${CMAKE_BINARY_DIR}/proto/ge) ######### libge_local_engine.so ############# add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) -target_compile_definitions(ge_local_engine PRIVATE Werror) +target_compile_definitions(ge_local_engine PRIVATE Werror COMPILE_OMG_PACKAGE) target_link_libraries(ge_local_engine graph ${PROTOBUF_LIBRARY} diff --git a/src/ge/ge_local_engine/module.mk b/src/ge/ge_local_engine/module.mk index ee6b15c1..3307f780 100644 --- a/src/ge/ge_local_engine/module.mk +++ b/src/ge/ge_local_engine/module.mk @@ -42,7 +42,7 @@ include ${BUILD_HOST_SHARED_LIBRARY} include $(CLEAR_VARS) LOCAL_MODULE := atclib/libge_local_engine LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 +LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE LOCAL_LDFLAGS := LOCAL_STATIC_LIBRARIES := diff --git a/src/ge/ge_runner.mk b/src/ge/ge_runner.mk index 04182070..956bab0b 100644 --- a/src/ge/ge_runner.mk +++ b/src/ge/ge_runner.mk @@ -356,6 +356,7 @@ LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) LOCAL_STATIC_LIBRARIES := libge_memory \ libadump_server \ + libmsprofiler \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ diff --git a/src/ge/generator/ge_generator.cc b/src/ge/generator/ge_generator.cc index edd7a155..bef93333 100644 --- a/src/ge/generator/ge_generator.cc +++ b/src/ge/generator/ge_generator.cc @@ -136,6 +136,13 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen bool attr) { GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); + + auto format = tensor.GetFormat(); + auto data_type = tensor.GetDataType(); + if (format == FORMAT_RESERVED && data_type == DT_UNDEFINED) { + return SUCCESS; + } + string op_type; if (!AttrUtils::GetStr(tensor, kAttrOpType, op_type) || op_type.empty()) { op_type = DATA; @@ -521,8 +528,8 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, bool is_offline) { GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); - if (!inputs.empty() && (inputs.size() != op_desc->GetInputsSize())) { - GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetInputsSize()); + if (!inputs.empty() && (inputs.size() != op_desc->GetAllInputsSize())) { + GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetAllInputsSize()); return PARAM_INVALID; } if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) { diff --git a/src/ge/graph/build/memory/graph_mem_assigner.cc b/src/ge/graph/build/memory/graph_mem_assigner.cc index 1518714f..1cdb2efa 100644 --- a/src/ge/graph/build/memory/graph_mem_assigner.cc +++ b/src/ge/graph/build/memory/graph_mem_assigner.cc @@ -322,11 +322,19 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { GELOGE(ge::FAILED, "There is an atomic conflict between the current node and the peer out node, not supported!"); return ge::FAILED; - } else if (is_loop_graph) { - GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, mem_clean_start)); - } else { - GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {mem_clean_start}, {mem_clean_size}), - "SetAtomicCleanAttr failed."); + } + + const auto &in_control_anchor = node->GetInControlAnchor(); + GE_CHECK_NOTNULL(in_control_anchor); + for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { + auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); + if (peer_out_node->GetType() == ATOMICADDRCLEAN) { + ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}); + if (ret != SUCCESS) { + GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str()); + return ret; + } + } } } } @@ -840,68 +848,37 @@ Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map(memory_offset_[0].mem_offset_); - GELOGI("Begin to reAssign atomic memory, atomic initial address mem_offset = %zu!", memory_offset_[0].mem_offset_); - - vector connect_netoutput_nodes; - for (auto &node : compute_graph_->GetAllNodes()) { - auto node_op_desc = node->GetOpDesc(); - if (node_op_desc == nullptr) { - continue; - } - - bool is_atomic = false; - // If GetBool fail, is_atomic is false. - (void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic); - if (!is_atomic) { - continue; - } - - bool is_ref = false; - // If GetBool fail, is_ref is false. - (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_REFERENCE, is_ref); - if (is_ref) { - GELOGE(ge::PARAM_INVALID, "The node %s cannot have both atomic and ref attribute.", - node_op_desc->GetName().c_str()); - return ge::PARAM_INVALID; - } - - vector is_connect_netoutput; - // If GetBool fail, attr is_connect_netoutput is an empty vector. - (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput); - if (!is_connect_netoutput.empty()) { - connect_netoutput_nodes.emplace_back(node); - continue; - } + map> normal_atomic_and_clean_nodes_map; + vector connecting_output_atomic_nodes; + Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes); + if (status != SUCCESS) { + GELOGE(status, "Failed to filter atomic nodes for memory assignment."); + return status; + } - // Atomic op memory start addr of loop graph - int64_t loop_graph_atomic_mem_start = static_cast(memory_offset_[0].mem_offset_); - vector mem_offset_end; - if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) { - GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str()); - return FAILED; - } + for (auto &iter : normal_atomic_and_clean_nodes_map) { + int64_t atomic_mem_start = static_cast(memory_offset_[0].mem_offset_); + GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start); - /// In networks with loop op, atomic op uses atomic_addr_clean op independently, - /// so we need to set the attr separately. - if (is_loop_graph) { - GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, loop_graph_atomic_mem_start)); + for (auto &atomic_node : iter.second) { + vector mem_offset_end; + status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end); + if (status != SUCCESS) { + GELOGE(status, "Assign atomic output and workspace memory failed, node name is %s.", + atomic_node->GetName().c_str()); + return status; + } } - } - // In networks without loop op, the same atomic addr clean op is used for atomic op - if (!is_loop_graph) { - // Set the address attr of atomic clean operator - int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start; - if (atomic_mem_size != 0) { - GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {atomic_mem_start}, {atomic_mem_size}), - "SetAtomicCleanAttr failed."); + int64_t atomic_mem_size = static_cast(memory_offset_[0].mem_offset_) - atomic_mem_start; + status = SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}); + if (status != SUCCESS) { + GELOGE(status, "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str()); + return status; } } - if (AssignConnectNetOutputAtomicMemory(connect_netoutput_nodes) != SUCCESS) { + if (AssignConnectNetOutputAtomicMemory(connecting_output_atomic_nodes) != SUCCESS) { GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput."); return FAILED; } @@ -909,6 +886,55 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { return SUCCESS; } +Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(map> &normal_atomic_nodes_map, + vector &connecting_output_atomic_nodes) { + GE_CHECK_NOTNULL(compute_graph_); + for (const auto &node : compute_graph_->GetAllNodes()) { + if (node->GetType() == ATOMICADDRCLEAN) { + vector tmp_normal_atomic_nodes; + const auto &out_control_anchor = node->GetOutControlAnchor(); + GE_CHECK_NOTNULL(out_control_anchor); + for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) { + if (peer_in_control_anchor != nullptr) { + auto peer_in_node = peer_in_control_anchor->GetOwnerNode(); + auto peer_in_node_desc = peer_in_node->GetOpDesc(); + if (peer_in_node_desc != nullptr) { + bool is_atomic_node = false; + // If GetBool fail, is_atomic_node is false. + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node); + if (is_atomic_node) { + bool is_reference = false; + // If GetBool fail, is_reference is false. + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference); + if (is_reference) { + GELOGE(ge::PARAM_INVALID, "The node %s cannot have both atomic and is_reference attribute.", + peer_in_node_desc->GetName().c_str()); + return ge::PARAM_INVALID; + } + + vector is_connecting_output; + // If GetBool fail, attr is_connecting_output is an empty vector. + (void)ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output); + if (is_connecting_output.empty()) { + tmp_normal_atomic_nodes.emplace_back(peer_in_node); + continue; + } + connecting_output_atomic_nodes.emplace_back(peer_in_node); + tmp_normal_atomic_nodes.clear(); + break; + } + } + } + } + + if (!tmp_normal_atomic_nodes.empty()) { + normal_atomic_nodes_map[node] = tmp_normal_atomic_nodes; + } + } + } + return SUCCESS; +} + Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node, vector &mem_offset_end) { auto node_op_desc = node->GetOpDesc(); @@ -1331,6 +1357,7 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< vector memory_type; auto tmp_op_desc = node->GetOpDesc(); origin_input_list = tmp_op_desc->GetInputOffset(); + int64_t valid_input_index = 0; bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type); for (const auto &anchor : node->GetAllInDataAnchors()) { vector output_list; @@ -1344,8 +1371,9 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc(); GE_CHECK_NOTNULL(last_peer_out_op_desc); output_list = last_peer_out_op_desc->GetOutputOffset(); - if (output_list.size() > static_cast(peer_out_anchor->GetIdx())) { - auto input_index = anchor->GetIdx(); + auto out_index = static_cast(peer_out_anchor->GetIdx()); + if (output_list.size() > static_cast(out_index)) { + int64_t input_offset = output_list.at(out_index); if (has_mem_type_attr) { auto input_size = tmp_op_desc->GetInputsSize(); auto ori_input_offset_list_size = origin_input_list.size(); @@ -1359,26 +1387,21 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< } // not hbm keep orignal inputoffest // hbm inputoffset = original inputoffset + outputoffset - input_list.emplace_back(memory_type[input_index] == RT_MEMORY_L1 - ? origin_input_list[input_index] - : origin_input_list[input_index] + output_list.at(peer_out_anchor->GetIdx())); - GELOGI("fuison: node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]", - tmp_op_desc->GetName().c_str(), input_index, - peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(), - input_list.back()); - } else { - int64_t output_offset = output_list.at(peer_out_anchor->GetIdx()); - const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode()); - if (in_node->GetType() == CONSTANT) { - GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(input_index); - GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, output_offset)); - } - - GELOGI("node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]", tmp_op_desc->GetName().c_str(), - input_index, peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(), - output_offset); - input_list.emplace_back(output_offset); + input_offset = (memory_type[valid_input_index] == RT_MEMORY_L1 + ? origin_input_list[valid_input_index] + : origin_input_list[valid_input_index] + output_list.at(out_index)); + } + const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode()); + if (in_node->GetType() == CONSTANT) { + GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(static_cast(anchor->GetIdx())); + GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset)); } + + GELOGI("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]", + has_mem_type_attr == true ? "Fusion" : "", tmp_op_desc->GetName().c_str(), valid_input_index, + peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), out_index, input_offset); + input_list.emplace_back(input_offset); + valid_input_index++; } } return ge::SUCCESS; @@ -1473,125 +1496,49 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in return SUCCESS; } -Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start) { - // set the address attr of atomic clean operator for loop graph - int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start; - GELOGI("SetLoopGraphAtomicAttr beign, atomic_addr_clean start size is %ld, mem_size is %ld, mem_offset is %zu.", - atomic_mem_start, atomic_mem_size, memory_offset_[0].mem_offset_); - const auto &in_control_anchor = node->GetInControlAnchor(); - if (atomic_mem_size != 0 && in_control_anchor != nullptr) { - for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { - if (peer_out_control_anchor == nullptr) { - continue; - } - auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); - auto peer_out_node_desc = peer_out_node->GetOpDesc(); - if (peer_out_node_desc == nullptr) { - continue; - } - - GELOGD("SetLoopGraphAtomicAttr, node is %s, op type is %s.", peer_out_node_desc->GetName().c_str(), - peer_out_node_desc->GetType().c_str()); - - if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { - GE_CHK_STATUS_EXEC(SetAtomicCleanAttr(peer_out_node, {atomic_mem_start}, {atomic_mem_size}), - GELOGE(FAILED, "SetAtomicCleanAttr failed."); - return FAILED); - } - } - } - return SUCCESS; -} - -ge::Status GraphMemoryAssigner::IsIndependentAtomicClean(const ge::NodePtr &node, - bool &is_independent_atomic_clean_node) { - GE_CHECK_NOTNULL(node); - const auto &out_control_anchor = node->GetOutControlAnchor(); - GE_CHECK_NOTNULL(out_control_anchor); - for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) { - if (peer_in_control_anchor != nullptr) { - auto peer_in_node = peer_in_control_anchor->GetOwnerNode(); - auto peer_in_node_desc = peer_in_node->GetOpDesc(); - if (peer_in_node_desc != nullptr) { - bool is_atomic_node = false; - // If GetBool fail, is_atomic_node is false. - (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node); - if (is_atomic_node) { - vector is_connect_netoutput; - // If GetBool fail, attr is_connect_netoutput is an empty vector. - (void)ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput); - if (!is_connect_netoutput.empty()) { - GELOGD("Peer in node %s is independent atomic clean node", peer_in_node->GetName().c_str()); - is_independent_atomic_clean_node = true; - break; - } - } - } - } - } - - return SUCCESS; -} - -ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, const vector &atomic_mem_start, +ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector &atomic_mem_start, const vector &atomic_mem_size) { - for (ge::NodePtr &node : compute_graph_->GetAllNodes()) { - auto node_op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); - - bool is_valid_atomic_clean_node = (n != nullptr) && (node->GetName() == n->GetName()); - - if (((n == nullptr) && (node_op_desc->GetType() == ATOMICADDRCLEAN))) { - bool is_independent_atomic_clean = false; - if (IsIndependentAtomicClean(node, is_independent_atomic_clean) != SUCCESS) { - GELOGE(FAILED, "Failed to determine the connection relationship of atomic addr clean node."); - return PARAM_INVALID; - } - - is_valid_atomic_clean_node = is_valid_atomic_clean_node || (!is_independent_atomic_clean); + auto node_op_desc = node->GetOpDesc(); + if (node_op_desc != nullptr) { + GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str()); + vector workspace_vector = node_op_desc->GetWorkspace(); + vector workspace_byte_vector = node_op_desc->GetWorkspaceBytes(); + workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); + workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); + node_op_desc->SetWorkspace(workspace_vector); + node_op_desc->SetWorkspaceBytes(workspace_byte_vector); + + std::vector mem_start_vector; + // If GetListInt fail, mem_start_vector is empty. + (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); + mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), + GELOGE(FAILED, "SetListInt failed."); + return FAILED); + + std::vector mem_size_vector; + // If GetListInt fail, mem_size_vector is empty. + (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); + mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), + GELOGE(FAILED, "SetListInt failed."); + return FAILED); + + std::stringstream ss; + for (auto iter : atomic_mem_start) { + ss << iter << " "; } - - if (is_valid_atomic_clean_node) { - GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str()); - vector workspace_vector = node_op_desc->GetWorkspace(); - vector workspace_byte_vector = node_op_desc->GetWorkspaceBytes(); - workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); - workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); - node_op_desc->SetWorkspace(workspace_vector); - node_op_desc->SetWorkspaceBytes(workspace_byte_vector); - - std::vector mem_start_vector; - // If GetListInt fail, mem_start_vector is empty. - (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); - mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); - GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), - GELOGE(FAILED, "SetListInt failed."); - return FAILED); - - std::vector mem_size_vector; - // If GetListInt fail, mem_size_vector is empty. - (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); - mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); - GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), - GELOGE(FAILED, "SetListInt failed."); - return FAILED); - - std::stringstream ss; - for (auto iter : atomic_mem_start) { - ss << iter << " "; - } - string atomic_mem_start_str = ss.str(); - ss.clear(); - ss.str(""); - for (auto iter : atomic_mem_size) { - ss << iter << " "; - } - string atomic_mem_size_str = ss.str(); - - GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]", - node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), - atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId()); + string atomic_mem_start_str = ss.str(); + ss.clear(); + ss.str(""); + for (auto iter : atomic_mem_size) { + ss << iter << " "; } + string atomic_mem_size_str = ss.str(); + + GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]", + node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), + atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId()); } return SUCCESS; } diff --git a/src/ge/graph/build/memory/graph_mem_assigner.h b/src/ge/graph/build/memory/graph_mem_assigner.h index e1e408be..201e6d01 100644 --- a/src/ge/graph/build/memory/graph_mem_assigner.h +++ b/src/ge/graph/build/memory/graph_mem_assigner.h @@ -135,6 +135,9 @@ class GraphMemoryAssigner { ge::Status ReAssignAtomicMemory(bool is_loop_graph); + ge::Status FilterAtomicNodesForMemoryAssign(std::map> &normal_atomic_nodes_map, + std::vector &connecting_output_atomic_nodes); + ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, int64_t &continuous_mem_size); @@ -165,14 +168,8 @@ class GraphMemoryAssigner { ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, const std::vector &mem_offset_end); - /// - /// @brief set loop graph atomic attr - /// @param node, atomic memory assignment start offset - /// @param atomic_mem_start: atomic op memory start address - /// - ge::Status SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start); - ge::Status SetAtomicCleanAttr(const ge::NodePtr &n, const std::vector &atomic_mem_start, + ge::Status SetAtomicCleanAttr(const ge::NodePtr &node, const std::vector &atomic_mem_start, const std::vector &atomic_mem_size); ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node); diff --git a/src/ge/graph/load/new_model_manager/data_dumper.cc b/src/ge/graph/load/new_model_manager/data_dumper.cc index e4e3a63f..c6283d92 100644 --- a/src/ge/graph/load/new_model_manager/data_dumper.cc +++ b/src/ge/graph/load/new_model_manager/data_dumper.cc @@ -695,11 +695,7 @@ Status DataDumper::LoadDumpInfo() { } if (dump_properties_.GetDumpMode() == kDumpInput) { if (op_iter.is_task) { - Status ret = DumpInput(op_iter, task); - if (ret != SUCCESS) { - GELOGE(ret, "Dump input failed"); - return ret; - } + GE_CHK_STATUS_RET(DumpInput(op_iter, task), "Dump input failed"); } op_mapping_info.mutable_task()->Add(std::move(task)); continue; @@ -726,7 +722,7 @@ Status DataDumper::LoadDumpInfo() { SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info); - if (!op_list_.empty() || is_op_debug_) { + if (!op_list_.empty() || is_op_debug_ || is_end_graph_) { auto ret = ExecuteLoadDumpInfo(op_mapping_info); if (ret != SUCCESS) { GELOGE(ret, "Execute load dump info failed"); @@ -740,7 +736,6 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, aicpu::dump::OpMappingInfo &op_mapping_info) { if (dump_properties_.GetDumpMode() == kDumpOutput || dump_properties_.GetDumpMode() == kDumpInput || dump_properties_.GetDumpMode() == kDumpAll) { - GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); aicpu::dump::Task task; task.set_end_graph(true); task.set_task_id(end_graph_task_id_); @@ -748,6 +743,14 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, task.mutable_op()->set_op_name(NODE_NAME_END_GRAPH); task.mutable_op()->set_op_type(ENDGRAPH); op_mapping_info.mutable_task()->Add(std::move(task)); + + is_end_graph_ = true; + if (op_mapping_info.model_name_param_case() == aicpu::dump::OpMappingInfo::kModelName) { + GELOGI("Add end_graph_info to aicpu, model_name is %s, task_id is %u, stream_id is %u", + op_mapping_info.model_name().c_str(), end_graph_task_id_, end_graph_stream_id_); + return; + } + GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); } } diff --git a/src/ge/graph/load/new_model_manager/data_dumper.h b/src/ge/graph/load/new_model_manager/data_dumper.h index 0a1c2274..30218416 100644 --- a/src/ge/graph/load/new_model_manager/data_dumper.h +++ b/src/ge/graph/load/new_model_manager/data_dumper.h @@ -116,6 +116,7 @@ class DataDumper { std::vector op_list_; uint32_t end_graph_task_id_ = 0; uint32_t end_graph_stream_id_ = 0; + bool is_end_graph_ = false; std::multimap input_map_; bool load_flag_; uint32_t device_id_; diff --git a/src/ge/graph/load/new_model_manager/davinci_model.cc b/src/ge/graph/load/new_model_manager/davinci_model.cc index 81eb4bc9..50867782 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.cc +++ b/src/ge/graph/load/new_model_manager/davinci_model.cc @@ -1928,13 +1928,7 @@ Status DavinciModel::SinkModelProfile() { name = name_; } size_t name_len = name.size(); - // phy device id - uint32_t phy_device_id = 0; - rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, - GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); - return FAILED); - reporter_data.deviceId = phy_device_id; + reporter_data.deviceId = device_id_; reporter_data.data = (unsigned char *)&name_len; reporter_data.dataLen = sizeof(int32_t); GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", @@ -2103,12 +2097,7 @@ Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, return FAILED, "Sink model tag memcpy error."); // device id - uint32_t phy_device_id = 0; - rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, - GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); - return FAILED); - reporter_data.deviceId = phy_device_id; + reporter_data.deviceId = device_id_; // Model Header string name; diff --git a/src/ge/graph/load/new_model_manager/model_manager.cc b/src/ge/graph/load/new_model_manager/model_manager.cc index f6995052..4a596738 100644 --- a/src/ge/graph/load/new_model_manager/model_manager.cc +++ b/src/ge/graph/load/new_model_manager/model_manager.cc @@ -236,7 +236,6 @@ ModelManager::~ModelManager() { std::lock_guard lock(map_mutex_); model_map_.clear(); model_aicpu_kernel_.clear(); - cust_aicpu_so_.clear(); GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0))); } @@ -400,6 +399,7 @@ Status ModelManager::Unload(uint32_t model_id) { } std::lock_guard lock(exeception_infos_mutex_); exception_infos_.clear(); + cust_aicpu_so_.clear(); return SUCCESS; } diff --git a/src/ge/graph/load/new_model_manager/model_utils.cc b/src/ge/graph/load/new_model_manager/model_utils.cc index 9cbb684f..2bb111f3 100644 --- a/src/ge/graph/load/new_model_manager/model_utils.cc +++ b/src/ge/graph/load/new_model_manager/model_utils.cc @@ -328,15 +328,14 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co op_desc->GetName().c_str(), v_memory_type.size(), inputs_size); return v_input_data_addr; } - for (size_t i = 0; i < inputs_size; ++i) { + for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { + const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(static_cast(i)); + if (tensor_desc == nullptr) { + GELOGD("Op: %s, Index: %zu, has no input", op_desc->GetName().c_str(), i); + continue; + } if ((i < v_is_input_const.size()) && v_is_input_const[i] && (op_type != NETOUTPUT)) { // TBE: add weights address to input - const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i); - if (tensor_desc == nullptr) { - GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); - continue; - } - int64_t tensor_size = 0; GE_CHK_STATUS(TensorUtils::GetSize(*tensor_desc, tensor_size)); if (tensor_size) { diff --git a/src/ge/graph/passes/attach_stream_label_pass.cc b/src/ge/graph/passes/attach_stream_label_pass.cc index b8065325..6b718418 100644 --- a/src/ge/graph/passes/attach_stream_label_pass.cc +++ b/src/ge/graph/passes/attach_stream_label_pass.cc @@ -89,16 +89,13 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { nodes.push(node); static const std::set end_type_set = {STREAMSWITCH, STREAMMERGE, MERGE}; - bool merge_flag = false; - bool exit_flag = false; - bool net_output_flag = false; while (!nodes.empty()) { NodePtr cur_node = nodes.top(); nodes.pop(); if (visited.count(cur_node) > 0) { continue; } - if (AttachFlag(cur_node, stream_label, merge_flag, exit_flag, net_output_flag) != SUCCESS) { + if (AttachFlag(cur_node, stream_label) != SUCCESS) { GELOGE(FAILED, "Attach flag for node %s failed.", cur_node->GetName().c_str()); return FAILED; } @@ -122,12 +119,6 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { GE_CHK_STATUS_RET(SetActiveLabelList(node, {stream_label}), "set active_label_list failed."); } - bool attach_flag = (merge_flag || exit_flag) && net_output_flag; - if (attach_flag) { - GELOGI("No need to keep on attaching label."); - return SUCCESS; - } - for (const NodePtr &tmp_node : branch_nodes) { GELOGD("Attach label %s to node: %s.", stream_label.c_str(), tmp_node->GetName().c_str()); GE_CHK_STATUS_RET(SetStreamLabel(tmp_node, stream_label), "Set stream label failed."); @@ -140,13 +131,9 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { /// @brief attach flag /// @param [in] node /// @param [out] stream_label -/// @param [out] merge_flag -/// @param [out] exit_flag -/// @param [out] net_output_flag /// @return Status /// -Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &stream_label, bool &merge_flag, - bool &exit_flag, bool &net_output_flag) { +Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &stream_label) { const std::string &type = node->GetType(); if (type == STREAMSWITCH) { if (node->GetInDataNodes().empty()) { @@ -164,12 +151,8 @@ Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &strea } else if (type == STREAMMERGE) { stream_label = node->GetName(); GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); - merge_flag = true; } else if ((type == EXIT) || (type == REFEXIT)) { GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); - exit_flag = true; - } else if (type == NETOUTPUT) { - net_output_flag = true; } return SUCCESS; diff --git a/src/ge/graph/passes/attach_stream_label_pass.h b/src/ge/graph/passes/attach_stream_label_pass.h index 5820480d..28e828b5 100644 --- a/src/ge/graph/passes/attach_stream_label_pass.h +++ b/src/ge/graph/passes/attach_stream_label_pass.h @@ -50,13 +50,9 @@ class AttachStreamLabelPass : public GraphPass { /// @brief attach flag /// @param [in] node /// @param [out] stream_label - /// @param [out] merge_flag - /// @param [out] exit_flag - /// @param [out] net_output_flag /// @return Status /// - static Status AttachFlag(const NodePtr &node, std::string &stream_label, bool &merge_flag, bool &exit_flag, - bool &net_output_flag); + static Status AttachFlag(const NodePtr &node, std::string &stream_label); /// /// @brief Update stream_label for loop_branch diff --git a/src/ge/graph/passes/enter_pass.cc b/src/ge/graph/passes/enter_pass.cc index 84621689..ad3d78fc 100644 --- a/src/ge/graph/passes/enter_pass.cc +++ b/src/ge/graph/passes/enter_pass.cc @@ -20,13 +20,14 @@ #include "framework/common/debug/log.h" #include "graph/utils/graph_utils.h" +namespace { +const size_t kOutNodesNum = 1; +} + namespace ge { Status EnterPass::Run(NodePtr &node) { GELOGD("EnterPass running"); - if (node == nullptr) { - GELOGE(PARAM_INVALID, "param [node] must not be null."); - return PARAM_INVALID; - } + GE_CHECK_NOTNULL(node); if ((node->GetType() != ENTER) && (node->GetType() != REFENTER)) { return SUCCESS; @@ -38,18 +39,17 @@ Status EnterPass::Run(NodePtr &node) { return PARAM_INVALID; } NodePtr in_node = node->GetInDataNodes().at(0); - if (in_node == nullptr) { - GELOGE(PARAM_INVALID, "param [in_node] must not be null"); - return PARAM_INVALID; - } + GE_CHECK_NOTNULL(in_node); if ((in_node->GetType() != CONSTANT) && (in_node->GetType() != CONSTANTOP)) { return SUCCESS; } - bool need_remove_flag = - in_node->GetInControlNodes().empty() && node->GetInControlNodes().empty() && node->GetOutDataNodes().empty(); - if (need_remove_flag) { + bool need_remove_flag = in_node->GetInControlNodes().empty() && node->GetInControlNodes().empty(); + if (!need_remove_flag) { + return SUCCESS; + } + if (node->GetOutDataNodes().empty()) { for (auto &out_ctrl_node : node->GetOutControlNodes()) { if (out_ctrl_node == nullptr) { continue; @@ -60,9 +60,47 @@ Status EnterPass::Run(NodePtr &node) { return FAILED; } } + } else { + if (OptimizeEnter(node, in_node) != SUCCESS) { + GELOGE(FAILED, "Optimize enter node[%s] failed.", node->GetName().c_str()); + return FAILED; + } } GELOGD("EnterPass success"); return SUCCESS; } + +Status EnterPass::OptimizeEnter(NodePtr &node, NodePtr &in_node) { + auto out_nodes_of_in_node = in_node->GetOutAllNodes(); + if (out_nodes_of_in_node.size() != kOutNodesNum) { + return SUCCESS; + } + + if (!node->GetOutControlNodes().empty()) { + return SUCCESS; + } + + for (const auto &out_node : node->GetOutDataNodes()) { + GE_CHECK_NOTNULL(out_node); + if (out_node->GetType() == MERGE) { + return SUCCESS; + } + } + + GE_CHECK_NOTNULL(in_node->GetOutDataAnchor(0)); + GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0))); + auto out_data_anchor = node->GetOutDataAnchor(0); + GE_CHECK_NOTNULL(out_data_anchor); + for (auto peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { + GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor)); + GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor)); + } + + auto graph = node->GetOwnerComputeGraph(); + GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph, node)) + AddRePassNodesWithInOut(in_node); + + return SUCCESS; +} } // namespace ge diff --git a/src/ge/graph/passes/enter_pass.h b/src/ge/graph/passes/enter_pass.h index 04ac62ee..73702c38 100644 --- a/src/ge/graph/passes/enter_pass.h +++ b/src/ge/graph/passes/enter_pass.h @@ -23,6 +23,9 @@ namespace ge { class EnterPass : public BaseNodePass { public: Status Run(NodePtr &node) override; + + private: + Status OptimizeEnter(NodePtr &node, NodePtr &in_node); }; } // namespace ge #endif // GE_GRAPH_PASSES_ENTER_PASS_H_ diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.cc b/src/ge/graph/preprocess/multi_batch_copy_graph.cc index 331d9c31..336527fb 100644 --- a/src/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/src/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -41,7 +41,6 @@ #include "inc/pass_manager.h" #include "graph/common/local_context.h" -using std::map; using std::set; using std::string; using std::vector; @@ -266,24 +265,27 @@ Status MultiBatchGraphCopyer::Init() { } Status MultiBatchGraphCopyer::LabelStatus() { - map> frame_enters; - InitStatus(frame_enters); - + for (const auto &data : origin_data_nodes_) { + auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); + if (!IsAllDimsPositive(data_shape.GetDims())) { + origin_nodes_status_[data.get()] = kNodeInBatchBranch; + } + } bool changed = true; // If anyone of in node is kNodeInBatchBranch, it is also kNodeInBatchBranch while (changed) { changed = false; for (const auto &node : origin_all_nodes_) { + auto iter = origin_nodes_status_.find(node.get()); + if (iter != origin_nodes_status_.end()) { + continue; + } for (auto &in_node : node->GetInAllNodes()) { bool is_in_batch = origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end() && origin_nodes_status_[in_node.get()] == kNodeInBatchBranch; if (is_in_batch) { - if (origin_nodes_status_.find(node.get()) == origin_nodes_status_.end() || - origin_nodes_status_[node.get()] != kNodeInBatchBranch) { - origin_nodes_status_[node.get()] = kNodeInBatchBranch; - ResetEnterStatus(frame_enters, node); - changed = true; - } + origin_nodes_status_[node.get()] = kNodeInBatchBranch; + changed = true; break; } } @@ -314,45 +316,6 @@ Status MultiBatchGraphCopyer::LabelStatus() { return SUCCESS; } -void MultiBatchGraphCopyer::InitStatus(map> &frame_enters) { - for (const auto &node : origin_all_nodes_) { - if (node->GetType() != ENTER && node->GetType() != REFENTER) { - continue; - } - auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - continue; - } - string frame_name; - if (AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) { - frame_enters[frame_name].emplace_back(node); - } - } - - for (const auto &data : origin_data_nodes_) { - auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); - if (!IsAllDimsPositive(data_shape.GetDims())) { - origin_nodes_status_[data.get()] = kNodeInBatchBranch; - } - } -} - -void MultiBatchGraphCopyer::ResetEnterStatus(map> &frame_enters, const NodePtr &node) { - if (node->GetType() != ENTER && node->GetType() != REFENTER) { - return; - } - - for (const auto &frame_enter : frame_enters) { - auto &enters = frame_enter.second; - if (std::find(enters.begin(), enters.end(), node) != enters.end()) { - for (const auto &enter : enters) { - origin_nodes_status_[enter.get()] = kNodeInBatchBranch; - } - break; - } - } -} - Status MultiBatchGraphCopyer::CreateNewNodes() { shape_data_ = InsertShapeDataNode(); if (shape_data_ == nullptr) { @@ -1200,7 +1163,7 @@ void GetDynamicShapeByMerge(const ComputeGraphPtr &graph, const NodePtr &node, s } } -// Connect NetOutput directly: DTS2020070612498 +// Connect NetOutput directly void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, const set &dynamic_output_index, vector &dynamic_output_dims) { GELOGD("Try get directly shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str()); diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.h b/src/ge/graph/preprocess/multi_batch_copy_graph.h index f665b65e..062b98d2 100644 --- a/src/ge/graph/preprocess/multi_batch_copy_graph.h +++ b/src/ge/graph/preprocess/multi_batch_copy_graph.h @@ -68,8 +68,6 @@ class MultiBatchGraphCopyer { // label status for origin_all_nodes_ Status LabelStatus(); - void InitStatus(std::map> &frame_enters); - void ResetEnterStatus(std::map> &frame_enters, const NodePtr &node); // add nodes functions Status CreateNewNodes(); diff --git a/src/ge/host_cpu_engine/module.mk b/src/ge/host_cpu_engine/module.mk index 41de4503..e35c68c9 100644 --- a/src/ge/host_cpu_engine/module.mk +++ b/src/ge/host_cpu_engine/module.mk @@ -40,7 +40,7 @@ include ${BUILD_HOST_SHARED_LIBRARY} include $(CLEAR_VARS) LOCAL_MODULE := atclib/libhost_cpu_engine LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 +LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE LOCAL_LDFLAGS := LOCAL_STATIC_LIBRARIES := diff --git a/src/ge/init/gelib.cc b/src/ge/init/gelib.cc index ec56cc0a..e00268ea 100644 --- a/src/ge/init/gelib.cc +++ b/src/ge/init/gelib.cc @@ -165,8 +165,10 @@ Status GELib::SystemInitialize(const map &options) { } } - // In train and infer, profiling is always needed. InitOptions(options); + + // In train and infer, profiling is always needed. + InitProfiling(this->options_); auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); GE_IF_BOOL_EXEC(model_manager->EnableExceptionDump(options) != SUCCESS, @@ -176,21 +178,19 @@ Status GELib::SystemInitialize(const map &options) { // 2.`(!is_train_mode_) && (options_.device_id != kDefaultDeviceIdForInfer)` means case: online infer // these two case with logical device id if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { - InitProfiling(this->options_, true); status = InitSystemWithOptions(this->options_); } else { - InitProfiling(this->options_); status = InitSystemWithoutOptions(); } return status; } -void GELib::InitProfiling(Options &options, bool convert_2_phy_device_id) { +void GELib::InitProfiling(Options &options) { GELOGI("Init Profiling. session Id: %ld, device id:%d ", options.session_id, options.device_id); std::lock_guard lock(status_mutex_); GetContext().Init(); // Profiling init - if (ProfilingManager::Instance().Init(options, convert_2_phy_device_id) != SUCCESS) { + if (ProfilingManager::Instance().Init(options) != SUCCESS) { GELOGW("Profiling init failed."); } } diff --git a/src/ge/init/gelib.h b/src/ge/init/gelib.h index c8b3ff8a..b5621dfd 100644 --- a/src/ge/init/gelib.h +++ b/src/ge/init/gelib.h @@ -68,7 +68,7 @@ class GELib { // get incre build cache path const std::string &GetIncreBuildCachePath() const { return incre_build_cache_path_; } - void InitProfiling(Options &options, bool convert_2_phy_device_id = false); + void InitProfiling(Options &options); void ShutDownProfiling(); Status InitSystemWithoutOptions(); diff --git a/src/ge/ir_build/atc_ir_common.cc b/src/ge/ir_build/atc_ir_common.cc index 82ed40bd..1f8abf37 100644 --- a/src/ge/ir_build/atc_ir_common.cc +++ b/src/ge/ir_build/atc_ir_common.cc @@ -522,7 +522,7 @@ void PrintOptionMap(std::map &options, std::string tip for (auto iter = options.begin(); iter != options.end(); iter++) { std::string key = iter->first; std::string option_name = iter->second; - GELOGI("%s set successfully, key=%s, value=%s", tips.c_str(), key.c_str(), option_name.c_str()); + GELOGI("%s set successfully, option_key=%s, option_value=%s", tips.c_str(), key.c_str(), option_name.c_str()); } } diff --git a/src/ge/ir_build/ge_ir_build.cc b/src/ge/ir_build/ge_ir_build.cc index 90f7a8ca..86b304c1 100644 --- a/src/ge/ir_build/ge_ir_build.cc +++ b/src/ge/ir_build/ge_ir_build.cc @@ -96,6 +96,12 @@ static graphStatus CheckGlobalOptions(std::map &global return ge::GRAPH_PARAM_INVALID, "check optypelist_for_implmode and op_select_implmode failed!"); global_options[ge::ir_option::OP_SELECT_IMPL_MODE] = op_select_implmode; + // set precision mode default value + std::string precision_mode = global_options.find(ge::ir_option::PRECISION_MODE) == global_options.end() + ? "force_fp16" + : global_options[ge::ir_option::PRECISION_MODE]; + global_options[ge::ir_option::PRECISION_MODE] = precision_mode; + return GRAPH_SUCCESS; } diff --git a/src/ge/opskernel_manager/ops_kernel_manager.cc b/src/ge/opskernel_manager/ops_kernel_manager.cc index 51e8f438..11eb3061 100644 --- a/src/ge/opskernel_manager/ops_kernel_manager.cc +++ b/src/ge/opskernel_manager/ops_kernel_manager.cc @@ -175,25 +175,25 @@ Status OpsKernelManager::ParsePluginOptions(const map &options, } else if (flag == 1) { enable_flag = true; } else { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), - iter->second.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", + plugin_name.c_str(), iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } } catch (std::invalid_argument &) { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.feFlag, its value %s is invalid_argument, it must be 0 or 1.", + GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:ge.feFlag, its value %s is invalid_argument, it must be 0 or 1.", iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } catch (std::out_of_range &) { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.feFlag, its value %s is out of range, it must be 0 or 1.", + GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:ge.feFlag, its value %s is out of range, it must be 0 or 1.", iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } catch (...) { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), - iter->second.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", + plugin_name.c_str(), iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } } else { - GELOGI("Not find key %s, set to default value false.", plugin_name.c_str()); + GELOGI("Not find option_key %s, set to default value false.", plugin_name.c_str()); enable_flag = false; } diff --git a/src/ge/session/omg.cc b/src/ge/session/omg.cc index bcf42032..0fb342e1 100644 --- a/src/ge/session/omg.cc +++ b/src/ge/session/omg.cc @@ -618,11 +618,16 @@ Status ParseOutNodes(const string &out_nodes) { if (!out_nodes.empty()) { domi::GetContext().out_nodes_map.clear(); domi::GetContext().user_out_nodes.clear(); + domi::GetContext().user_out_nodes_top_vec.clear(); vector nodes_v = StringUtils::Split(out_nodes, ';'); for (const string &node : nodes_v) { vector key_value_v = StringUtils::Split(node, ':'); if (key_value_v.size() != 2) { // The size must be 2. + if (key_value_v.size() == 1 && domi::GetContext().type == domi::CAFFE) { + domi::GetContext().user_out_nodes_top_vec.push_back(node); + continue; + } ErrorManager::GetInstance().ATCReportErrMessage( "E10001", {"parameter", "value", "reason"}, {"--out_nodes", node, "the correct format is \"node_name1:0;node_name1:1;node_name2:0\""}); @@ -632,7 +637,13 @@ Status ParseOutNodes(const string &out_nodes) { node.c_str()); return PARAM_INVALID; } - auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]); + if (!domi::GetContext().user_out_nodes_top_vec.empty()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, + {"--out_nodes", out_nodes, "is not all index or top_name"}); + GELOGE(PARAM_INVALID, "This out_nodes str must be all index or top_name, while the actual input is %s", + out_nodes.c_str()); + return PARAM_INVALID; + } // stoi: The method may throw an exception: invalid_argument/out_of_range if (!CheckDigitStr(key_value_v[1])) { ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, @@ -640,7 +651,10 @@ Status ParseOutNodes(const string &out_nodes) { GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s", out_nodes.c_str()); return PARAM_INVALID; } + + auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]); int32_t index = stoi(StringUtils::Trim(key_value_v[1])); + GELOGD("Get output info: node[%s] and index[%ld]", key_value_v[0].c_str(), index); if (iter != domi::GetContext().out_nodes_map.end()) { iter->second.emplace_back(index); } else { diff --git a/src/ge/single_op/single_op.cc b/src/ge/single_op/single_op.cc index 8e68208d..f59fb7bd 100644 --- a/src/ge/single_op/single_op.cc +++ b/src/ge/single_op/single_op.cc @@ -279,7 +279,7 @@ Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, con if (op_task_->GetOpTaskType() == OP_TASK_TBE) { return ExecuteTbeTask(input_desc, inputs, output_desc, outputs); } else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) { - return op_task_->LaunchKernel(input_desc, inputs, output_desc, outputs, stream_); + return op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_); } else { GELOGE(UNSUPPORTED, "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", op_task_->GetOpTaskType()); diff --git a/src/ge/single_op/task/build_task_utils.cc b/src/ge/single_op/task/build_task_utils.cc index 9e97ee57..268cbfd1 100644 --- a/src/ge/single_op/task/build_task_utils.cc +++ b/src/ge/single_op/task/build_task_utils.cc @@ -75,8 +75,11 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { // Conv2D IN[DT_FLOAT16 NC1HWC0[256, 128, 7, 7, 16],DT_FLOAT16 FRACTAL_Z[128, 32, 16, 16]] // OUT[DT_FLOAT16 NC1HWC0[256, 32, 7, 7, 16]] ss << op_type << " IN["; - for (uint32_t idx = 0; idx < op_desc->GetInputsSize(); idx++) { + for (uint32_t idx = 0; idx < op_desc->GetAllInputsSize(); idx++) { const GeTensorDescPtr &input = op_desc->MutableInputDesc(idx); + if (input == nullptr) { + continue; + } ss << TypeUtils::DataTypeToSerialString(input->GetDataType()) << " "; ss << TypeUtils::FormatToSerialString(input->GetFormat()); ss << VectorToString(input->GetShape().GetDims()); diff --git a/src/ge/single_op/task/op_task.cc b/src/ge/single_op/task/op_task.cc index 0c489aa4..78db835e 100644 --- a/src/ge/single_op/task/op_task.cc +++ b/src/ge/single_op/task/op_task.cc @@ -34,6 +34,11 @@ constexpr int kLaunchRetryTimes = 1000; constexpr int kSleepTime = 10; constexpr uint64_t kReleaseFlag = 1; constexpr int kCopyNum = 2; +void FreeHbm(void *var) { + if (var) { + (void)rtFree(var); + } +} } // namespace Status OpTask::OpenDump(const std::vector &io_addr, rtStream_t stream) { @@ -336,49 +341,23 @@ Status AiCpuBaseTask::UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensor } AiCpuTask::~AiCpuTask() { - if (args_ != nullptr) { - (void)rtFree(args_); - } - - if (io_addr_ != nullptr) { - (void)rtFree(io_addr_); - } - - if (dynamic_flag_ && workspace_addr_ != nullptr) { - (void)rtFree(workspace_addr_); - } - if (copy_workspace_buf_ != nullptr) { - (void)rtFree(copy_workspace_buf_); - } - - if (copy_ioaddr_dev_ != nullptr) { - (void)rtFree(copy_ioaddr_dev_); - } - - if (copy_input_release_flag_dev_ != nullptr) { - (void)rtFree(copy_input_release_flag_dev_); - } - - if (copy_input_data_size_dev_ != nullptr) { - (void)rtFree(copy_input_data_size_dev_); - } - - if (copy_input_src_dev_ != nullptr) { - (void)rtFree(copy_input_src_dev_); - } - - if (copy_input_dst_dev_ != nullptr) { - (void)rtFree(copy_input_dst_dev_); - } - - if (copy_task_args_buf_ != nullptr) { - (void)rtFree(copy_task_args_buf_); - } - + FreeHbm(args_); + FreeHbm(io_addr_); + if (dynamic_flag_) { + FreeHbm(workspace_addr_); + } + FreeHbm(copy_workspace_buf_); + FreeHbm(copy_ioaddr_dev_); + FreeHbm(copy_input_release_flag_dev_); + FreeHbm(copy_input_data_size_dev_); + FreeHbm(copy_input_src_dev_); + FreeHbm(copy_input_dst_dev_); + FreeHbm(copy_task_args_buf_); for (auto summary : output_summary_) { - if (summary != nullptr) { - (void)rtFree(summary); - } + FreeHbm(summary); + } + for (auto out_shape : out_shape_hbm_) { + FreeHbm(out_shape); } } @@ -405,7 +384,7 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) { return SUCCESS; } -Status AiCpuTask::PrepareCopyInputs(vector &outputs, const std::vector &out_shape_hbm) { +Status AiCpuTask::PrepareCopyInputs(vector &outputs) { std::vector copy_input_release_flag; std::vector copy_input_data_size; std::vector copy_input_src; @@ -417,11 +396,15 @@ Status AiCpuTask::PrepareCopyInputs(vector &outputs, const std::vector 0) { + copy_input_data_size.emplace_back(output.length); + } else { + copy_input_data_size.emplace_back(summary.raw_data_size); + } copy_input_src.emplace_back(summary.raw_data_ptr); - copy_input_dst.emplace_back(reinterpret_cast(output)); + copy_input_dst.emplace_back(reinterpret_cast(output.data)); - const auto &shape_buffer = out_shape_hbm[i]; + const auto &shape_buffer = out_shape_hbm_[i]; copy_input_release_flag.emplace_back(kReleaseFlag); copy_input_data_size.emplace_back(summary.shape_data_size); copy_input_src.emplace_back(summary.shape_data_ptr); @@ -441,7 +424,7 @@ Status AiCpuTask::PrepareCopyInputs(vector &outputs, const std::vector &out_shape_hbm) { +Status AiCpuTask::ReadResultSummaryAndPrepareMemory() { for (size_t i = 0; i < num_outputs_; ++i) { auto &result_summary = output_summary_host_[i]; @@ -449,36 +432,39 @@ Status AiCpuTask::ReadResultSummaryAndPrepareMemory(std::vector &out_sha sizeof(aicpu::FWKAdapter::ResultSummary), RT_MEMCPY_DEVICE_TO_HOST)); auto shape_data_size = result_summary.shape_data_size; void *shape_buffer = nullptr; - GE_MAKE_GUARD_RTMEM(shape_buffer); - GE_CHK_RT_RET(rtMalloc(&shape_buffer, shape_data_size, RT_MEMORY_HBM)); - out_shape_hbm.emplace_back(shape_buffer); + if (shape_data_size > 0) { + GE_CHK_RT_RET(rtMalloc(&shape_buffer, shape_data_size, RT_MEMORY_HBM)); + } + out_shape_hbm_.emplace_back(shape_buffer); } return SUCCESS; } -Status AiCpuTask::CopyDataToHbm(vector &outputs, const std::vector &out_shape_hbm, rtStream_t stream) { - GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs, out_shape_hbm)); +Status AiCpuTask::CopyDataToHbm(vector &outputs, rtStream_t stream) { + GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs)); GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), RT_KERNEL_DEFAULT, stream)); GE_CHK_RT_RET(rtStreamSynchronize(stream)); return SUCCESS; } -Status AiCpuTask::UpdateShapeByHbmBuffer(vector &output_desc, const std::vector &out_shape_hbm) { +Status AiCpuTask::UpdateShapeByHbmBuffer(vector &output_desc) { for (size_t i = 0; i < num_outputs_; ++i) { const auto &result_summary = output_summary_host_[i]; std::vector shape_dims; - const auto &shape_hbm = out_shape_hbm[i]; - - uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t); - std::unique_ptr shape_addr(new (std::nothrow) int64_t[dim_num]()); - GE_CHECK_NOTNULL(shape_addr); - GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm, result_summary.shape_data_size, - RT_MEMCPY_DEVICE_TO_HOST)); - - for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) { - shape_dims.emplace_back(shape_addr[dim_idx]); - GELOGD("Node [%zu]th output dim[%u]=%ld.", i, dim_idx, shape_addr[dim_idx]); + if (result_summary.shape_data_size > 0) { + const auto &shape_hbm = out_shape_hbm_[i]; + + uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t); + std::unique_ptr shape_addr(new (std::nothrow) int64_t[dim_num]()); + GE_CHECK_NOTNULL(shape_addr); + GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm, + result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST)); + + for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) { + shape_dims.emplace_back(shape_addr[dim_idx]); + GELOGD("Node [%zu]th output dim[%u]=%ld.", i, dim_idx, shape_addr[dim_idx]); + } } GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), output_desc[i]), @@ -487,7 +473,7 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector &output_desc, cons return SUCCESS; } -Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector &output_desc, vector &outputs, +Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector &output_desc, vector &outputs, rtStream_t stream) { if (num_outputs_ == 0) { GELOGI("Output num is 0, there is no need to update the output and size."); @@ -496,13 +482,20 @@ Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector &output GELOGI("Update shape and data by result summary begin."); - std::vector out_shape_hbm; - GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(out_shape_hbm), - "Read ResultSummary and update output shape failed."); + for (auto out_shape : out_shape_hbm_) { + FreeHbm(out_shape); + } + out_shape_hbm_.clear(); + GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(), "Read ResultSummary and update output shape failed."); + + GE_CHK_STATUS_RET(CopyDataToHbm(outputs, stream), "Copy data to output failed."); - GE_CHK_STATUS_RET(CopyDataToHbm(outputs, out_shape_hbm, stream), "Copy data to output failed."); + GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(output_desc), "Update shape by hbm buffer failed."); - GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(output_desc, out_shape_hbm), "Update shape by hbm buffer failed."); + for (auto out_shape : out_shape_hbm_) { + FreeHbm(out_shape); + } + out_shape_hbm_.clear(); GELOGI("Update shape and data by result summary end."); return SUCCESS; @@ -603,10 +596,18 @@ Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { return SUCCESS; } -Status AiCpuTask::LaunchKernel(const std::vector &input_desc, const std::vector &inputs, - std::vector &output_desc, std::vector &outputs, - rtStream_t stream) { +Status AiCpuTask::LaunchKernel(const std::vector &input_desc, + const std::vector &input_buffers, std::vector &output_desc, + std::vector &output_buffers, rtStream_t stream) { GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc)); + std::vector inputs; + std::vector outputs; + for (auto &buffer : input_buffers) { + inputs.emplace_back(buffer.data); + } + for (auto &buffer : output_buffers) { + outputs.emplace_back(buffer.data); + } GE_CHK_STATUS_RET_NOLOG(SetIO(inputs, outputs)); GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); GE_CHK_RT_RET(rtStreamSynchronize(stream)); @@ -614,7 +615,7 @@ Status AiCpuTask::LaunchKernel(const std::vector &input_desc, cons if (unknown_type_ == DEPEND_SHAPE_RANGE) { GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc)); } else if (unknown_type_ == DEPEND_COMPUTE) { - GE_CHK_STATUS_RET_NOLOG(UpdateShapeAndDataByResultSummary(output_desc, outputs, stream)); + GE_CHK_STATUS_RET_NOLOG(UpdateShapeAndDataByResultSummary(output_desc, output_buffers, stream)); } return SUCCESS; @@ -658,9 +659,9 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { return SUCCESS; } -Status AiCpuCCTask::LaunchKernel(const std::vector &input_desc, const std::vector &inputs, - std::vector &output_desc, std::vector &outputs, - rtStream_t stream) { +Status AiCpuCCTask::LaunchKernel(const std::vector &input_desc, + const std::vector &input_buffers, std::vector &output_desc, + std::vector &output_buffers, rtStream_t stream) { GE_CHK_BOOL_RET_STATUS(unknown_type_ != DEPEND_COMPUTE, FAILED, "AiCpuCCTask unknown type[%d] is depend compute, it's not supported now.", unknown_type_); @@ -669,11 +670,11 @@ Status AiCpuCCTask::LaunchKernel(const std::vector &input_desc, co size_t arg_index = 0; auto *task_io_addr = reinterpret_cast(io_addr_); GE_CHECK_NOTNULL(task_io_addr); - for (auto &input : inputs) { - task_io_addr[arg_index++] = reinterpret_cast(input); + for (auto &input : input_buffers) { + task_io_addr[arg_index++] = reinterpret_cast(input.data); } - for (auto &output : outputs) { - task_io_addr[arg_index++] = reinterpret_cast(output); + for (auto &output : output_buffers) { + task_io_addr[arg_index++] = reinterpret_cast(output.data); } GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); diff --git a/src/ge/single_op/task/op_task.h b/src/ge/single_op/task/op_task.h index b6ea9114..5f742197 100644 --- a/src/ge/single_op/task/op_task.h +++ b/src/ge/single_op/task/op_task.h @@ -57,8 +57,9 @@ class OpTask { void SetWorkspaceSizes(const vector &workspace_sizes); const OpDescPtr &GetOpdesc() const { return op_desc_; } Status OpenDump(const std::vector &io_addr, rtStream_t stream); - virtual Status LaunchKernel(const std::vector &input_desc, const std::vector &inputs, - std::vector &output_desc, std::vector &outputs, rtStream_t stream) { + virtual Status LaunchKernel(const std::vector &input_desc, const std::vector &input_buffers, + std::vector &output_desc, std::vector &output_buffers, + rtStream_t stream) { return UNSUPPORTED; } @@ -138,8 +139,9 @@ class AiCpuTask : public AiCpuBaseTask { OpTaskType GetOpTaskType() override { return OP_TASK_AICPU; } const void *GetIOAddr() const override; - Status LaunchKernel(const std::vector &input_desc, const std::vector &inputs, - std::vector &output_desc, std::vector &outputs, rtStream_t stream) override; + Status LaunchKernel(const std::vector &input_desc, const std::vector &input_buffers, + std::vector &output_desc, std::vector &output_buffers, + rtStream_t stream) override; Status SetMemCopyTask(const domi::KernelExDef &kernel_def); private: @@ -147,14 +149,14 @@ class AiCpuTask : public AiCpuBaseTask { // for copy task. Status InitForSummaryAndCopy(); - Status UpdateShapeAndDataByResultSummary(vector &output_desc, vector &outputs, + Status UpdateShapeAndDataByResultSummary(vector &output_desc, vector &outputs, rtStream_t stream); - Status ReadResultSummaryAndPrepareMemory(std::vector &out_shape_hbm); + Status ReadResultSummaryAndPrepareMemory(); - Status CopyDataToHbm(vector &outputs, const std::vector &out_shape_hbm, rtStream_t stream); - Status PrepareCopyInputs(vector &outputs, const std::vector &out_shape_hbm); + Status CopyDataToHbm(vector &outputs, rtStream_t stream); + Status PrepareCopyInputs(vector &outputs); - Status UpdateShapeByHbmBuffer(vector &output_desc, const std::vector &out_shape_hbm); + Status UpdateShapeByHbmBuffer(vector &output_desc); friend class AiCpuTaskBuilder; void *workspace_addr_ = nullptr; @@ -178,6 +180,8 @@ class AiCpuTask : public AiCpuBaseTask { void *copy_input_data_size_dev_; void *copy_input_src_dev_; void *copy_input_dst_dev_; + + vector out_shape_hbm_; }; class AiCpuCCTask : public AiCpuBaseTask { @@ -197,8 +201,9 @@ class AiCpuCCTask : public AiCpuBaseTask { void SetIoAddr(void *io_addr); size_t GetArgSize() const; - Status LaunchKernel(const std::vector &input_desc, const std::vector &inputs, - std::vector &output_desc, std::vector &outputs, rtStream_t stream) override; + Status LaunchKernel(const std::vector &input_desc, const std::vector &input_buffers, + std::vector &output_desc, std::vector &output_buffers, + rtStream_t stream) override; private: friend class AiCpuCCTaskBuilder; diff --git a/third_party/fwkacllib/inc/ops/aipp.h b/third_party/fwkacllib/inc/ops/aipp.h index 0c1d5112..dd01ac5f 100644 --- a/third_party/fwkacllib/inc/ops/aipp.h +++ b/third_party/fwkacllib/inc/ops/aipp.h @@ -25,16 +25,21 @@ namespace ge { /** -*@brief Performs AI pre-processing (AIPP) on images including color space conversion (CSC), -image normalization (by subtracting the mean value or multiplying a factor), image cropping -(by specifying the crop start and cropping the image to the size required by the neural network), and much more. \n +*@brief Performs AI pre-processing (AIPP) on images including color space +conversion (CSC), +image normalization (by subtracting the mean value or multiplying a factor), +image cropping +(by specifying the crop start and cropping the image to the size required by +the neural network), and much more. \n *@par Inputs: -*@li images: An NCHW or NHWC tensor of type uint8, specifying the input to the data layer. +*@li images: An NCHW or NHWC tensor of type uint8, specifying the input to the +data layer. *@li params: Dynamic AIPP configuration parameters of type uint8. \n *@par Attributes: -*aipp_config_path: A required string, specifying the path of the AIPP configuration file. \n +*aipp_config_path: A required string, specifying the path of the AIPP +configuration file. \n *@par Outputs: *features: The AIPP-processed output tensor of type float16 or uint8. diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index 5d68b977..6d865399 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -28,9 +28,10 @@ namespace ge { *@par Inputs: *Dynamic inputs, including: -* @li x: A list of Tensor objects, each with same shape and type. The supported types are: +* @li x: A list of Tensor objects, each with same shape and type. The supported +types are: * float16, float32, double, int32, uint8, int16, int8, complex64, int64, -* qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n +* qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n *@par Outputs: *y: A Tensor. Has the same shape and type as the elements of "x". \n @@ -121,7 +122,8 @@ REG_OP(MinimumGrad) *@par Inputs: *One input: -*x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8, +*x:A Tensor. Must be one of the following types: bool, float16, float, int8, +int32, uint32, uint8, int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. \n *@par Attributes: @@ -385,7 +387,8 @@ REG_OP(Sign) *@par Inputs: *Two inputs, including: \n -*@li x1: A Tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64,complex128 +*@li x1: A Tensor. Must be one of the following types: float16, float32, + float64, int32, int64, complex64,complex128 *@li x2: A Tensor. Has the same type as "x1". \n *@par Outputs: @@ -484,12 +487,16 @@ REG_OP(Equal) *@par Inputs: *One input:\n -*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. \n +*x: A Tensor. Must be one of the following types: float16, float32, double, +complex64, complex128. \n *@par Attributes: -*@li base: An optional attribute of type float32, specifying the base gamma. Defaults to "-1.0". -*@li scale: An optional attribute of type float32, specifying the scale alpha. Defaults to "1.0". -*@li shift: An optional attribute of type float32, specifying the shift beta. Defaults to "0.0". \n +*@li base: An optional attribute of type float32, specifying the base gamma. +Defaults to "-1.0". +*@li scale: An optional attribute of type float32, specifying the scale alpha. +Defaults to "1.0". +*@li shift: An optional attribute of type float32, specifying the shift beta. +Defaults to "0.0". \n *@par Outputs: *y: A Tensor of the same type as "x". \n @@ -510,7 +517,8 @@ REG_OP(Exp) *@par Inputs: *One input: -*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. \n +*x: A Tensor. Must be one of the following types: float16, float32, double, +complex64, complex128. \n *@par Outputs: *y: A Tensor of the same type as "x". \n @@ -527,7 +535,9 @@ REG_OP(Expm1) *@brief: Computes the reciprocal of "x". \n *@par Inputs:\n -*x: A Tensor. Must be one of the following types: float16, float32, int32, int64, double, complex64, complex128. \n +*x: A Tensor. Must be one of the following types: float16, float32, +int32, int64, double, +complex64, complex128. \n *@par Outputs: *y: A Tensor. Has the same type as "x". \n @@ -749,7 +759,8 @@ REG_OP(Xlogy) *@par Inputs: *One input: \n -*x: A Tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128 +*x: A Tensor. Must be one of the following types: float16, float32, float64, +int32, int64, complex64, complex128 *@par Outputs: *y: A Tensor. Has the same type as "x". \n @@ -790,7 +801,8 @@ REG_OP(Rsqrt) * *@par Inputs: -* x: A tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128. +* x: A tensor. Must be one of the following types: float16, float32, float64, +int32, int64, complex64, complex128. * *@par Outputs: * y: A tensor. Has the same type as "x". @@ -811,7 +823,8 @@ REG_OP(Asin) * *@par Inputs: -*@li y: A tensor of type float16, float32, float64, int32, int64, complex64, complex128. +*@li y: A tensor of type float16, float32, float64, +int32, int64, complex64, complex128. *@li dy: A tensor of the same type as "y". * *@attention Constraints: @@ -838,7 +851,8 @@ REG_OP(AsinGrad) * *@par Inputs: -* x: A tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128. +* x: A tensor. Must be one of the following types: float16, float32, float64, +int32, int64, complex64, complex128. * *@par Outputs: * y: A tensor. Has the same type as "x". @@ -883,7 +897,8 @@ REG_OP(AcosGrad) * *@par Inputs: -* x: A tensor. Must be one of the following types: float16, float32, float64, complex64, complex128. +* x: A tensor. Must be one of the following types: float16, float32, float64, + complex64, complex128. * *@attention Constraints: * x Given an input tensor, the function computes inverse hyperbolic cosine of every element.\n @@ -1160,7 +1175,8 @@ REG_OP(FusedMulAdd) * *@par Inputs: -*@li x1: A tensor. Must be one of the following types: float16, float32, float64, uint8, int8, int16, int32, int64, complex64, complex128. +*@li x1: A tensor. Must be one of the following types: float16, float32, float64, +uint8, int8, int16, int32, int64, complex64, complex128. *@li x2: A tensor of the same type as "x1". * *@attention Constraints: @@ -1189,7 +1205,8 @@ REG_OP(AddV2) *@brief Updates "ref" by adding "value" to it. \n *@par Inputs: -*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. +*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, +int16, int32, int64, uint8, uint16, uint32, uint64. *@li value: A Tensor of the same type as "ref". \n *@par Attributes: @@ -1218,12 +1235,14 @@ REG_OP(AssignAdd) *@brief Updates "ref" by assigning "value" to it. \n *@par Inputs: -*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. +*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, +int32, int64, uint8, uint16, uint32, uint64. *@li value: A Tensor of the same type as "ref". \n *@par Attributes: *@li validate_shape: An optional bool. Defaults to "true". - If "true", the operation will validate that the shape of "value" matches the shape of the Tensor being assigned to. + If "true", the operation will validate that the shape of "value" + matches the shape of the Tensor being assigned to. * If "false", "ref" will take on the shape of "value". * This attribute is reserved. *@li use_locking: An optional bool. Defaults to True. @@ -1252,7 +1271,8 @@ REG_OP(Assign) * *@par Inputs: -*@li var: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, uint32, uint64 +*@li var: A tensor. Must be one of the following types: float32, float64, +int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, uint32, uint64 *@li value: A tensor of the same type as "var". * *@par Attributes: @@ -1644,7 +1664,9 @@ REG_OP(Atan2) * *@par Inputs: -*@li x1: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64 +*@li x1: A tensor. Must be one of the following types: float32, float64, int32, + uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, +float16, uint32, uint64 *@li x2: A tensor of the same type as "x1". * *@par Attributes: @@ -1666,16 +1688,18 @@ REG_OP(ApproximateEqual) /** *@brief Returns the element-wise sum of a list of tensors.\n -* AccumulateNV2 performs the same operation as AddN, but does not wait for all of its inputs -to be ready before beginning to sum.\n This can save memory if inputs are ready at different times, -since minimum temporary storage is proportional to the output size rather than the inputs size. - Returns a Tensor of same shape and type as the elements of inputs. \n +* AccumulateNV2 performs the same operation as AddN, but does not wait for all +of its inputs to be ready before beginning to sum.\n This can save memory if +inputs are ready at different times, \n since minimum temporary storage is +proportional to the output size rather than the inputs size.\n Returns a Tensor +of same shape and type as the elements of inputs. \n * *@par Inputs: *Dynamic inputs, including: -* x: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, -qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64. It's a dynamic input. \n +* x: A tensor. Must be one of the following types: float32, float64, int32, +uint8, int16, int8, complex64, int64, \n qint8, quint8, qint32, uint16, +complex128, float16, uint32, uint64. * *@par Outputs: * y: A tensor. Has the same type as "x". @@ -1731,7 +1755,8 @@ REG_OP(FakeQuantWithMinMaxArgs) *@par Inputs: *Two inputs, including: \n -*@li gradients: A Tensor of type float32. Backpropagated gradients above the FakeQuantWithMinMaxArgs operation. +*@li gradients: A Tensor of type float32. Backpropagated gradients +above the FakeQuantWithMinMaxArgs operation. *@li x: A Tensor of type float32. Has the same type and format as "gradients".\n * This is the input Tensor of the FakeQuantWithMinMaxArgs operator.\n @@ -2210,9 +2235,13 @@ REG_OP(BiasAdd) *@par Inputs: *Two inputs, including: -*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, bfloat16, uint16, complex128, float16, uint32, uint64. +*@li x: A Tensor. Must be one of the following types: float32, float64, int32, +uint8, int16, int8, complex64, int64, qint8, quint8, qint32, bfloat16, uint16, +complex128, float16, uint32, uint64. *format is ND. -*@li dimension: A Tensor. Must be one of the following types: int32, int64. Must be in the range [-rank(input x), rank(input x)]. Describes which dimension of the input Tensor to reduce across. +*@li dimension: A Tensor. Must be one of the following types: int32, int64. +Must be in the range [-rank(input x), rank(input x)]. Describes which dimension +of the input Tensor to reduce across. * The format is ND. *@par Attributes: *dtype: The output type, either "int32" or "int64". Defaults to "int64". \n @@ -2286,6 +2315,7 @@ REG_OP(ArgMaxV2) .ATTR(dtype, Type, DT_INT64) .OP_END_FACTORY_REG(ArgMaxV2) + /** *@brief Returns the index with the largest value across axes of a tensor. \n @@ -2298,15 +2328,16 @@ REG_OP(ArgMaxV2) *@li dtype: The output type, either "int32" or "int64". Defaults to "int64". \n *@par Outputs: -*y: A multi-dimensional Tensor of type int32, specifying the index with the largest value. The dimension is one less than that of "x". \n +*y: A multi-dimensional Tensor of type int32, specifying the index with the +largest value. The dimension is one less than that of "x". \n *@attention Constraints: *@li x: If there are multiple maximum values, the index of the first maximum value is used. -*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". \n +*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the +dimension length of "x". \n *@par Third-party framework compatibility * Compatible with TensorFlow operator ArgMax. -* * @par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ @@ -2929,9 +2960,13 @@ REG_OP(FusedMulAddN) *@li bias: An ND tensor of type float16 or float32. \n *@par Attributes: -*@li axis: An optional int32 used to compute the shape of bias input from the online bottoms. Defaults to "1". -*@li num_axes: An optional int32 used to compute the shape of bias input from a Caffe model trained offline. Defaults to "1". -*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe model trained offline. If "false", bias is input from online bottoms. Defaults to "true". \n +*@li axis: An optional int32 used to compute the shape of bias input from the +online bottoms. Defaults to "1". +*@li num_axes: An optional int32 used to compute the shape of bias input from a +Caffe model trained offline. Defaults to "1". +*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe +model trained offline. If "false", bias is input from online bottoms. Defaults +to "true". \n *@par Outputs: *y: An ND tensor of type float16 or float32. \n @@ -2939,13 +2974,25 @@ REG_OP(FusedMulAddN) *@attention Constraints:\n * Assume that the shape length of "x" is "n" and that of "bias" is "m". *@li "axis" is within the range [-n, n-1]. num_axes >= -1. -*@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < n-axis).\n -* If "axis < 0", the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < -axis). -*@li If "bias_from_blob = true" and "num_axes = 0", "bias" is a scalar with shape length 1 and dimension size 1. -*@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0", "axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).\n -* If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < num_axes). -*@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0","axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < m).\n -* If "axis < 0", "n + axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < m). +*@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis +of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < +n-axis).\n +* If "axis < 0", the ith axis of "bias" and the (i+n+"axis")th axis of "x" must +have the same size (0 <= i < -axis). +*@li If "bias_from_blob = true" and "num_axes = 0", "bias" is a scalar with +shape length 1 and dimension size 1. +*@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0", "axis + +num_axes" must be less than or equal to "n" and the ith axis of "bias" and the +(i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).\n +* If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and +the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same +size (0 <= i < num_axes). +*@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0","axis ++ m" must be less than or equal to "n" and the ith axis of "bias" and the (i ++"axis")th axis of "x" must have the same size (0 <= i < m).\n +* If "axis < 0", "n + axis + m" must be less than or equal to "n" and the ith +axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= +i < m). *@par Third-party framework compatibility * Compatible with the Caffe operator Bias. */ @@ -3023,10 +3070,12 @@ REG_OP(FusedMulAddNL2loss) *@li x: A Tensor with any format. Must be one of the following types: float16, float32. \n *@par Attributes: -*@li threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n +*@li threshold: A required float32. Defaults to "0.0". "x" is compared with +"threshold", outputs "1" for inputs above threshold; "0" otherwise. \n *@par Outputs: -*@li y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32. +*@li y: A Tensor with any format. Has the same type as the input. Must be one +of the following types: float16, float32. *@par Third-party framework compatibility * Compatible with the Caffe operator Threshold. */ @@ -3044,11 +3093,16 @@ REG_OP(FusedMulAddNL2loss) *@li x: A tensor. Must be one of the following types: float16, float32. \n *@par Attributes: -*@li axis: An optional int. Specify the axis to be cut at the input tensor. If this parameter is not provided, find the topk for each batch. Defaults to 10000 -*@li out_max_val: An optional bool. Whether to output the maximum value. If it is True, the maximum value and index are output, otherwise only the index is output. +*@li axis: An optional int. Specify the axis to be cut at the input tensor. If +this parameter is not provided, find the topk for each batch. Defaults to 10000 +*@li out_max_val: An optional bool. Whether to output the maximum value. If it +is True, the maximum value and index are output, otherwise only the index is +output. * Defaults to False -*@li topk: An optional int. It means the number of top tok in each axis (the value is greater than or equal to 1), and the value range must be in [1,x.shape(axis)]. -* Defaults to 1 +*@li topk: An optional int. It means the number of top tok in each axis (the +value is greater than or equal to 1), and the value range must be in [1,x.shape +(axis)]. +* Defaults to 1 \n *@par Outputs: *@li indices: A tensor of type float16, float32, int32. The index of the maximum value of the output. @@ -3168,7 +3222,8 @@ REG_OP(Axpy) .OP_END_FACTORY_REG(Axpy) /** -*@brief Creates a criterion that measures the loss given input tensors x1 x2 and a Tensor label y with values 1 or -1. \n +*@brief Creates a criterion that measures the loss given input tensors x1 x2 +and a Tensor label y with values 1 or -1. \n *@par Inputs: *@li x1: A ND Tensor with one of the following types: int8, uint8, int32, float16, float32. diff --git a/third_party/fwkacllib/inc/ops/functional_ops.h b/third_party/fwkacllib/inc/ops/functional_ops.h index 1e67c41f..bf5ebd51 100644 --- a/third_party/fwkacllib/inc/ops/functional_ops.h +++ b/third_party/fwkacllib/inc/ops/functional_ops.h @@ -36,7 +36,7 @@ namespace ge { * if "cond" is a numerical scalar, non-zero means True and zero means False; * if "cond" is a string scalar, non-empty means True and empty means False; * if "cond" is not a scalar, non-empty means True and empty means False. - *@li input: The input tensors . It's a dynamic input. \n + *@li input: The input tensors . \n *@par Graphs: *@li then_branch: A subgraph takes 'input' and returns a list of tensors, @@ -69,7 +69,7 @@ REG_OP(_If) * if "cond" is a numerical scalar, non-zero means True and zero means False; * if "cond" is a string scalar, non-empty means True and empty means False; * if "cond" is not a scalar, non-empty means True and empty means False. - *@li input: The input tensors . It's a dynamic input. \n + *@li input: The input tensors . \n *@par Graphs: *@li then_branch: A subgraph takes 'input' and returns a list of tensors, @@ -102,7 +102,7 @@ REG_OP(StatelessIf) * if "cond" is a numerical scalar, non-zero means True and zero means False; * if "cond" is a string scalar, non-empty means True and empty means False; * if "cond" is not a scalar, non-empty means True and empty means False. - *@li input: The input tensors . It's a dynamic input. \n + *@li input: The input tensors . \n *@par Graphs: *@li then_branch: A subgraph takes 'input' and returns a list of tensors, @@ -129,7 +129,7 @@ REG_OP(If) *@par Inputs: *@li branch_index: A int32 scalar which determines the selected subgraph. - *@li input: The input tensors, which will be passed to the subgraph . It's a dynamic input. \n + *@li input: The input tensors, which will be passed to the subgraph . \n *@par Graphs: *branches: A list of subgraphs, each of which takes 'input' and returns a list of tensors, @@ -152,7 +152,7 @@ REG_OP(Case) *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n *@par Inputs: - *input: The input tensors . It's a dynamic input. \n + *input: The input tensors . \n *@par Graphs: *@li cond: A subgraph takes 'input' and returns a tensor. @@ -183,7 +183,7 @@ REG_OP(_While) *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n *@par Inputs: - *input: The input tensors . It's a dynamic input. \n + *input: The input tensors . \n *@par Graphs: *@li cond: A subgraph takes 'input' and returns a tensor. @@ -215,7 +215,7 @@ REG_OP(While) *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n *@par Inputs: - *input: The input tensors . It's a dynamic input. \n + *input: The input tensors . \n *@par Graphs: *@li cond: A subgraph takes 'input' and returns a tensor. @@ -250,7 +250,7 @@ REG_OP(StatelessWhile) *@li start: A int32 scalar. The lower bound. *@li limit: A int32 scalar. The upper bound. *@li delta: A int32 scalar. The step size. - *@li input: The input tensors, which will be passed to "body" . It's a dynamic input. \n + *@li input: The input tensors, which will be passed to "body" . \n *@par Graphs: *body: A subgraph takes 'input' and returns a another list of tensors . \n @@ -274,7 +274,7 @@ REG_OP(For) *@brief Pass the input tensors to the subgraph "f" and return the output tensors . \n *@par Inputs: - *args: The input tensors, which will be passed to "f" . It's a dynamic input. \n + *args: The input tensors, which will be passed to "f" . \n *@par Graphs: *f: A subgraph takes 'args' and returns a another list of tensors . \n @@ -303,7 +303,7 @@ REG_OP(PartitionedCall) *@brief Pass the input tensors to the subgraph "f" and return the output tensors . \n *@par Inputs: - *args: The input tensors, which will be passed to "f" . It's a dynamic input. \n + *args: The input tensors, which will be passed to "f" . \n *@par Graphs: *f: A subgraph takes 'args' and returns a another list of tensors . \n diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index 27fb79a9..302823a2 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -160,8 +160,10 @@ REG_OP(CropAndResize) *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch) . \n *@par Attributes: -*@li crop_size: list int. [crop_height, crop_width]. All cropped image patches are resized to this size. -*@li extrapolation_value: An optional float. Defaults to 0. Value used for extrapolation, when applicable. +*@li crop_size: list int. [crop_height, crop_width]. All cropped image patches +are resized to this size. +*@li extrapolation_value: An optional float. Defaults to 0. Value used for +extrapolation, when applicable. *@li method: An optional string from: '"bilinear"'. Defaults to "bilinear" . \n *@par Outputs: @@ -172,7 +174,6 @@ REG_OP(CropAndResize) *@par Third-party framework compatibility *Compatible with tensorflow CropAndResize operator. - * @par Restrictions: * Warning: THIS FUNCTION IS DEPRECATED. Please use CropAndResize instead. */ diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h index 4fa85cbc..073d541d 100644 --- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h @@ -87,39 +87,58 @@ REG_OP(L2NormalizeGrad) *@par Inputs: * Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported) -*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. -*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D +*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW +for 4D or NC1HWC0 for 5D. +*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format +NHWC or NCHW. Must be 5D if input "x" is with format NC1HWC0. Specifies the scaling factor. *@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D if input "x" is with format NC1HWC0. Specifies the offset. -*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D -if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the +*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format +NHWC or NCHW. Must be 5D +if input "x" is with format NC1HWC0. Specifies the mean used for inference. +Must be "None" if the operation is used for training. -*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be -5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None" +*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format +NHWC or NCHW. Must be +5D if input "x" is with format NC1HWC0. Specifies the variance used for +inference. Must be "None" if the operation is used for training . \n *@par Attributes: -*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001". -*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC". -*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n +*@li epsilon: An optional float32, specifying the small value added to variance +to avoid dividing by zero. Defaults to "0.0001". +*@li data_format: An optional string, specifying the format of "x". Defaults to +"NHWC". +*@li is_training: An optional bool, specifying if the operation is used for +training or inference. Defaults to "True" . \n *@par Outputs: * Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported) -*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D. -*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D +*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", +with format NHWC or NCHW for 4D or NC1HWC0 for 5D. +*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with +format NHWC or NCHW. Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x". -*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. +*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with +format NHWC or NCHW. Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x". -*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. -Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. -*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. -Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n +*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input +"x" is with format NHWC or NCHW. +Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for +gradient computation. Pass "None" to skip this output. +*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input +"x" is with format NHWC or NCHW. +Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" +for gradient computation. Pass "None" to skip this output . \n *@attention Constraints: -*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, -then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance". -*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n +*@li If the operation is used for inference and outputs "reserve_space_1" and +"reserve_space_2" are available, +then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has +the same value as "variance". +*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square +root instruction . \n *@par Third-party framework compatibility *@li Compatible with the TensorFlow operator fused_batch_norm. @@ -166,13 +185,17 @@ is used for training or inference. Defaults to "True" . \n *@li y: A 4D Tensor of type float16 or float32, for the normalized "x". *@li batch_mean: A 1D Tensor of type float32, for the mean of "x". *@li batch_variance: A 1D Tensor of type float32, for the variance of "x". -*@li reserve_space_1: A 1D Tensor of type float32, for the mean of "x" for gradient computation. -*@li reserve_space_2: A 1D Tensor of type float32, for the variance of "x" for gradient computation . \n +*@li reserve_space_1: A 1D Tensor of type float32, for the mean of "x" for +gradient computation. +*@li reserve_space_2: A 1D Tensor of type float32, for the variance of "x" +for gradient computation . \n *@attention Constraints: *@li If the operation is used for inference, then output "reserve_space_1" -has the same value as "mean" and output "reserve_space_2" has the same value as "variance". -*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n +has the same value as "mean" and output "reserve_space_2" has the same value as +"variance". +*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square +root instruction . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator fused_batch_norm_v2. @@ -198,23 +221,34 @@ REG_OP(BatchNormExt2) *@par Inputs: * Five inputs, including: -*@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the gradient. -*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0. -*@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. -*@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm. -*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . \n +*@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format +NHWC, NCHW, or NC1HWC0, for the gradient. +*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, +or NC1HWC0. +*@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or +NC1HWC0. +*@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, +NCHW, or NC1HWC0. It is an output of BatchNorm. +*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, +NCHW, or NC1HWC0. It is an output of BatchNorm . \n *@par Attributes: -*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". +*@li epsilon: An optional float32. Defaults to "0.0001". A small float number +added to the variance of "x". *@li data_format: An optional string. Defaults to "NHWC". *@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n *@par Outputs: -*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x". -*@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "scale". -*@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "offset". -*@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output. -*@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output . \n +*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, +or NC1HWC0, for the offset of "x". +*@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, or +NC1HWC0, for the offset of "scale". +*@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, or +NC1HWC0, for the offset of "offset". +*@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW, or +NC1HWC0. Pass "None" to skip this output. +*@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW, or +NC1HWC0. Pass "None" to skip this output . \n *@attention Constraints: * The preceding layer of this operator must be operator BatchNorm . \n @@ -244,21 +278,28 @@ REG_OP(BatchNormGrad) *@par Inputs: * Five inputs, including: -*@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient. +*@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or +NCHW, for the gradient. *@li x: A 4D Tensor of type float16 or float32, with format NHWC or NCHW. *@li scale: A 4D Tensor of type float32, with format NHWC or NCHW. -*@li reserve_space_1: A 4D Tensor of type float32, with format NHWC or NCHW. It is an output of BatchNormExt2. -*@li reserve_space_2: A 4D Tensor of type float32, with format NHWC or NCHW. It is an output of BatchNormExt2 . \n +*@li reserve_space_1: A 4D Tensor of type float32, with format NHWC or NCHW. It +is an output of BatchNormExt2. +*@li reserve_space_2: A 4D Tensor of type float32, with format NHWC or NCHW. It +is an output of BatchNormExt2 . \n *@par Attributes: *@li epsilon: A required float32. A small float number added to the variance of "x". *@li data_format: A required string for the format. -*@li is_training: A required bool for specifying the operation is for training (true) or inference (false) . \n +*@li is_training: A required bool for specifying the operation is for training +(true) or inference (false) . \n *@par Outputs: -*@li x_backprop: A Tensor of type float16 or float32, with format NHWC or NCHW, for the offset of "x". -*@li scale_backprop: A Tensor of type float32, with format NHWC or NCHW, for the offset of "scale". -*@li offset_backprop: A Tensor of type float32, with format NHWC or NCHW, for the offset of "offset". +*@li x_backprop: A Tensor of type float16 or float32, with format NHWC or NCHW, +for the offset of "x". +*@li scale_backprop: A Tensor of type float32, with format NHWC or NCHW, for +the offset of "scale". +*@li offset_backprop: A Tensor of type float32, with format NHWC or NCHW, for +the offset of "offset". *@li reserve_space_3: A Tensor of type float32, with format NHWC or NCHW. *@li reserve_space_4: A Tensor of type float32, with format NHWC or NCHW . \n @@ -290,14 +331,18 @@ REG_OP(BatchNormGradExt2) *@brief Performs batch normalization . \n *@par Inputs: -*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. -*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. -*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. +*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW +for 4D or NC1HWC0 for 5D. +*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" +Specifies the mean used for inference. +*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" +Specifies the variance used for inference. *@li momentum: A Tensor,represents the mean and the variance's scale factor *@li scale: An optional tensor of type float16 or float32, no use *@li offset: An optional tensor of type float16 or float32, no use *@par Attributes: -*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". +*@li epsilon: An optional float32, specifying the small value added to variance +to avoid dividing by zero. Defaults to "0.00001". *@li use_global_stats: mean inference mode , only can be "True". *@li mode: An optional input, not use *@par Outputs: @@ -315,16 +360,20 @@ REG_OP(BNInference) .ATTR(use_global_stats, Bool,true) .ATTR(mode, Int,1) .OP_END_FACTORY_REG(BNInference) + /** *@brief aicpu batch normalization host . \n *@par Inputs: -*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. -*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. +*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" +Specifies the mean used for inference. +*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" +Specifies the variance used for inference. *@li momentum: An optional float, mean and variance's Scale factor *@par Attributes: -*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". +*@li epsilon: An optional float32, specifying the small value added to variance +to avoid dividing by zero. Defaults to "0.00001". *@li use_global_stats: mean inference mode , only can be "True". *@li mode: An optional attr, not use *@par Outputs: @@ -348,14 +397,19 @@ REG_OP(BnHost) *@brief Performs batch normalization . \n *@par Inputs: -*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. -*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. -*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. +*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW +for 4D or NC1HWC0 for 5D. +*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" +Specifies the mean used for inference. +*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" +Specifies the variance used for inference. *@li scale: An optional tensor of type float16 or float32, no use *@li offset: An optional tensor of type float16 or float32, no use *@par Attributes: -*@li momentum: An optional float32 num, represents the mean and the variance's scale factor -*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". +*@li momentum: An optional float32 num, represents the mean and the variance's +scale factor +*@li epsilon: An optional float32, specifying the small value added to variance +to avoid dividing by zero. Defaults to "0.00001". *@li use_global_stats: mean inference mode , only can be "True". *@li mode: An optional attr, not use *@par Outputs: diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index 12412516..6307889d 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -310,9 +310,6 @@ REG_OP(DepthwiseConv2DBackpropInputD) * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator DepthwiseConv2D. * @li Compatible with the Caffe operator DepthwiseConv2D. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DepthwiseConv2D) .INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) @@ -460,9 +457,9 @@ REG_OP(Conv2DBackpropInputD) *@par Attributes: * Six attributes: * @li strides: A tuple or list of 2 integers. The stride of the sliding window - * for H/W dimension. + * for H/W dimension, defaults to [1,1]. * @li pads: A tuple or list of 4 integers. The [top, bottom, left, right] - * padding on the feature map. + * padding on the feature map, defaults to [0,0,0,0]. * @li dilations: A tuple or list of 4 integers. The dilation factor for each * dimension of input, defaults to [1,1,1,1]. * @li groups: Number of blocked connections from input channels to @@ -482,8 +479,8 @@ REG_OP(Deconvolution) .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32})) - .REQUIRED_ATTR(strides, ListInt) - .REQUIRED_ATTR(pads, ListInt) + .ATTR(strides, ListInt, {1, 1}) + .ATTR(pads, ListInt, {0, 0, 0, 0}) .ATTR(dilations, ListInt, {1, 1, 1, 1}) .ATTR(groups, Int, 1) .ATTR(data_format, String, "NCHW") @@ -593,7 +590,7 @@ REG_OP(Conv2DBackpropFilterD) *@li bias: An optional 1D tensor. Shape is [out_channels]. *@li offset_w: An optional 1D tensor for quantized convolution. Shape is -* [out_channels]. Reserved. +* [out_channels]. Not supported. *\n *\n * Note that there is a strict data type mapping between the input and output @@ -622,7 +619,8 @@ REG_OP(Conv2DBackpropFilterD) * and right padding. * @li dilations: Optional. A list of 4 integers. Specifying the dilation rate * to use for dilated convolution. Has the same dimension order and value as -* "strides". Defaults to [1, 1, 1, 1]. +* "strides". Dilation > 1 is not supported for quantized convolution. Defaults +* to [1, 1, 1, 1]. * @li groups: Optional. An integer of type int32, for the number of blocked * connections from input channels to output channels. Input channels and output * channels must both be divisible by "groups". "x" in_channels must be equal to @@ -704,13 +702,62 @@ REG_OP(Conv2D) .ATTR(offset_x, Int, 0) .OP_END_FACTORY_REG(Conv2D) +/** +*@brief Computes a 2D convolution given 4D "x" and "filter_compress" tensors. +*@par Inputs: +* @li x: A 4D tensor of input images. +* @li filter_compress: A 4D tensor of compressed filters. +* @li compress_index: A 1D Tensor dtype of int8. +* @li bias: An optional 1D tensor. +* @li offset_w: An optional 1D tensor for quantized convolution. Reserved. +* +* The input and output tensor attributes are listed as follows: +* @verbatim + |Tensor | x | filter_compress | bias | offset_w | y + -----------|---------|---------|---------|----------|-------- + |Data Type | float16 | float16 | float16 | _ | float16 + | |---------|---------|---------|----------|-------- + | | float32 | float32 | float32 | _ | float32 + | |---------|---------|---------|----------|-------- + | | int8 | int8 | int32 | int8 | int32 + -----------|---------|---------|---------|----------|-------- + |Format | NCHW | NCHW | ND | ND | NCHW + | | NHWC | NHWC | | | NHWC + | | | HWCN | | | +@endverbatim +* It should be noted that the data types must correspond to each other, but the +* format does not need to . \n + +*@par Attributes: +* @li strides: A list of 4 integers. Specifying the strides of the +* convolution along the height and width. The dimension order is determined +* by the data format of "x". By default the N and C dimensions are set to 1. +* @li pads: A list of 4 integers. Specifying the top, bottom, left and right +* padding. +* @li dilations: A list of 4 integers. Specifying the dilation rate to use +* for dilated convolution. Has the same dimension order and value as "strides". +* @li groups: Number of blocked connections from input channels to output +* channels. Input channels and output channels must both be divisible by +* "groups".Type is int32. +* @li offset_x: An optional integer for quantized convolution. Type is int32. +* Defaults to "0". +* @li data_format: An optional string from: "NHWC", "NCHW". Specifying the +* data format of the input and output images. Type is string. +* Defaults to "NHWC". Reserved . \n + +*@par Outputs: +* @li y: A 4D Tensor of output images . \n + +*@par Restrictions: +*Warning: THIS FUNCTION IS DEPRECATED. +*/ REG_OP(Conv2DCompress) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) - .INPUT(filter_compress, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) + .INPUT(filter_compress, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) .INPUT(compress_index, TensorType({DT_INT8})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) .ATTR(dilations, ListInt, {1, 1, 1, 1}) diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index 415cc4ef..bd8bb9bf 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -158,18 +158,25 @@ REG_OP(Iou) *@par Inputs: * Three inputs, including: *@li ydiff: A 5HD gradient input of type float32. -*@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" indicates the number of ROIs, -the value "5" indicates the indexes of images where the ROIs are located, "x0", "x1", "y0", and "y1". -*@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved . \n +*@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" +indicates the number of ROIs, +the value "5" indicates the indexes of images where the ROIs are located, "x0", +"x1", "y0", and "y1". +*@li rois_n: An optional input, specifying the number of valid ROIs. This +parameter is reserved . \n *@par Attributes: *@li xdiff_shape: A required list of 4 ints, obtained based on the shape of "features" of ROIAlign. *@li pooled_width: A required attribute of type int, specifying the W dimension. *@li pooled_height: A required attribute of type int, specifying the H dimension. -*@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image. -*@li sample_num: An optional attribute of type int, specifying the horizontal and vertical -sampling frequency of each output. If this attribute is set to "0", the sampling frequency is -equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . \n +*@li spatial_scale: A required attribute of type float, specifying the scaling +ratio of "features" to the original image. +*@li sample_num: An optional attribute of type int, specifying the horizontal +and vertical +sampling frequency of each output. If this attribute is set to "0", the +sampling frequency is +equal to the rounded up value of "rois", which is a floating point number. +Defaults to "2" . \n *@par Outputs: *xdiff: Gradient added to input "features". Has the same 5HD shape as input "features". @@ -876,9 +883,7 @@ REG_OP(YoloV3DetectionOutputV2) A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. *@li imginfo: A float16, describing the image information including the required image height and width and the actual image height and width. -*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. -[[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] -is formed for the three Yolo outputs, respectively .It's a dynamic input. \n +*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively . \n *@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index 14949c54..0d0032cf 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -896,29 +896,7 @@ REG_OP(InstanceNormV2) .ATTR(epsilon, Float, 0.00001) .OP_END_FACTORY_REG(InstanceNormV2) -/** -*@brief Performs instance normalization for inference. - -*@par Inputs:\n -* Five inputs, including: (NC1HWC0 supported) -*@li x: A Tensor of type float16 or float32. -*@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma. -*@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta. -*@li mean: A [N, C1, 1, 1, C0] ensor of type float32, for the mean. -*@li variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance. -*@li variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt. - -*@par Outputs:\n -*y: A Tensor of type float16 or float32 for the normalized "x". -*batch_mean: A Tensor of type float32 for the result mean. -*batch_ variance: A Tensor of type float32 for the result variance. -*@attention Constraints: -*For Ascend 310, the result accuracy fails to reach 1<89> due to the square root instruction. - -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use INInferV2 instead. -*/ REG_OP(INInferV2D) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) .OPTIONAL_INPUT(gamma, TensorType({DT_FLOAT})) @@ -931,6 +909,20 @@ REG_OP(INInferV2D) .OUTPUT(batch_variance, TensorType({DT_FLOAT})) .OP_END_FACTORY_REG(INInferV2D) +/** +*@brief Performs instance normalization for inference of InHost part. + +*@par Inputs:\n +* One input, including: (NC1HWC0 supported) +* variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance. + +*@par Attributes: +* epsilon: An optional float32, specifying the small value added to +variance to avoid dividing by zero. Defaults to "0.00001" . \n + +*@par Outputs:\n +* variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt. +*/ REG_OP(InHost) .INPUT(variance, TensorType({DT_FLOAT})) .OUTPUT(variance_sqrt, TensorType({DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index 5d3cd931..fb7fc127 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -128,9 +128,6 @@ REG_OP(AvgPool) *@par Third-party framework compatibility * Compatible with the TensorFlow operator AvgPool3D. -* -* @par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(AvgPool3D) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h index 65fb462e..0621a96c 100644 --- a/third_party/fwkacllib/inc/ops/nn_training_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h @@ -111,9 +111,6 @@ REG_OP(ApplyAdaMax) * *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyAdaMax. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdaMax instead. */ REG_OP(ApplyAdaMaxD) .INPUT(var, TensorType::NumberType()) @@ -352,9 +349,6 @@ REG_OP(ApplyMomentum) * accum: A mutable tensor. Has the same type as input "accum". *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyMomentum. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyMomentum instead. */ REG_OP(ApplyMomentumD) @@ -681,9 +675,6 @@ REG_OP(ApplyPowerSign) * *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyPowerSign. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyPowerSign instead. */ REG_OP(ApplyPowerSignD) .INPUT(var, TensorType::NumberType()) @@ -804,9 +795,6 @@ REG_OP(ApplyAddSign) *@par Third-party framework compatibility * Compatible with the TensorFlow operator ApplyAddSign. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAddSign instead. */ REG_OP(ApplyAddSignD) .INPUT(var, TensorType::NumberType()) @@ -928,9 +916,6 @@ REG_OP(ApplyCenteredRMSProp) *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyCenteredRMSPropD. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyCenteredRMSProp instead. */ REG_OP(ApplyCenteredRMSPropD) .INPUT(var, TensorType::NumberType()) @@ -1049,9 +1034,6 @@ REG_OP(ApplyAdagrad) * *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyAdagrad. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdagrad instead. */ REG_OP(ApplyAdagradD) .INPUT(var, TensorType::NumberType()) @@ -1236,9 +1218,6 @@ REG_OP(ApplyAdagradDA) *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyAdagradDA. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdagradDA instead. */ REG_OP(ApplyAdagradDAD) .INPUT(var, TensorType::NumberType()) @@ -1496,9 +1475,6 @@ REG_OP(ApplyProximalAdagrad) *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyProximalAdagradD. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyProximalAdagrad instead. */ REG_OP(ApplyProximalAdagradD) .INPUT(var, TensorType::NumberType()) @@ -1592,9 +1568,6 @@ REG_OP(SparseApplyProximalAdagrad) *@par Third-party framework compatibility *Compatible with the TensorFlow operator SparseApplyProximalAdagrad. - -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyProximalAdagrad instead. */ REG_OP(SparseApplyProximalAdagradD) .INPUT(var, TensorType::NumberType()) @@ -1681,9 +1654,6 @@ REG_OP(ApplyFtrl) *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyFtrl. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyFtrl instead. */ REG_OP(ApplyFtrlD) .INPUT(var, TensorType::NumberType()) @@ -1775,9 +1745,6 @@ REG_OP(ApplyFtrlV2) *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyFtrlV2. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyFtrlV2 instead. */ REG_OP(ApplyFtrlV2D) .INPUT(var, TensorType::NumberType()) @@ -1890,9 +1857,6 @@ REG_OP(ApplyAdam) *@par Third-party framework compatibility *Compatible with the TensorFlow operator ApplyAdam. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdam instead. */ REG_OP(ApplyAdamD) .INPUT(var, TensorType::NumberType()) @@ -1981,9 +1945,6 @@ REG_OP(ApplyAdadelta) *@par Third-party framework compatibility * Compatible with the TensorFlow operator ApplyAdadelta. - -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdadelta instead. */ REG_OP(ApplyAdadeltaD) .INPUT(var, TensorType::NumberType()) diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h index 4f42008e..567bc63d 100644 --- a/third_party/fwkacllib/inc/ops/pad_ops.h +++ b/third_party/fwkacllib/inc/ops/pad_ops.h @@ -65,9 +65,6 @@ REG_OP(Fill) * *@par Outputs: * y: A tensor. Has the same type as "value". -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use Fill instead. */ REG_OP(FillD) .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, @@ -125,9 +122,6 @@ REG_OP(BroadcastTo) * *@par Third-party framework compatibility *Compatible with the TensorFlow operator BroadcastTo. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use BroadcastTo instead. */ REG_OP(BroadcastToD) .INPUT(x, TensorType::BasicType()) @@ -175,9 +169,6 @@ REG_OP(Pad) *@par Third-party framework compatibility: * Compatible with TensorFlow operator Pad. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead. */ REG_OP(PadD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT})) @@ -269,9 +260,6 @@ REG_OP(PadV3D) *@see Diag() *@par Third-party framework compatibility * Compatible with the TensorFlow operator Diag. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use Diag instead. */ REG_OP(DiagD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) diff --git a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h index cd6cfdfe..ec88c618 100644 --- a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h @@ -30,7 +30,7 @@ namespace ge { *@par Inputs: *Two inputs, including: *@li rt_nested_splits: A list of at least 1 Tensor objects with the same type -in: int32, int64. The row_splits for the RaggedTensor. It's a dynamic input. +in: int32, int64. The row_splits for the RaggedTensor. *@li rt_dense_values: A Tensor. The flat_values for the RaggedTensor Must be one of the following types: bool, int8, int16, uint16, int32, int64, double, float, float16 . \n @@ -66,7 +66,7 @@ REG_OP(RaggedTensorToSparse) *@li values:A 1D tensor representing the values of the ragged tensor. *@li default_value:A `Tensor`. Must have the same type as `values`. *@li row_partition_tensors:A list of at least 1 `Tensor` objects with the same -type in: `int64`, `int32` . It's a dynamic input.\n +type in: `int64`, `int32` .\n *@par Attributes: *@li num_row_partition_tensors:Numbers of row partition tensors. diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h index b97d824f..24a9edd1 100644 --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -374,9 +374,6 @@ REG_OP(DropOutGenMask) *@par Third-party framework compatibility * Compatible with the TensorFlow operator lin_space. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use LinSpace instead. */ REG_OP(LinSpaceD) .INPUT(assist, TensorType({DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index 626dda59..80169344 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -353,9 +353,6 @@ REG_OP(ReduceSum) *@par Third-party framework compatibility * Compatible with the TensorFlow operator Sum. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceSum instead. */ REG_OP(ReduceSumD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -381,9 +378,6 @@ REG_OP(ReduceSumD) *@par Third-party framework compatibility * Compatible with the TensorFlow operator ReduceAll. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceAll instead. */ REG_OP(ReduceAllD) .INPUT(x, TensorType({DT_BOOL})) @@ -459,9 +453,6 @@ REG_OP(ReduceProd) *@par Third-party framework compatibility * Compatible with the TensorFlow operator ReduceProd. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceProd instead. */ REG_OP(ReduceProdD) .INPUT(x,TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_INT32, DT_FLOAT16})) @@ -516,9 +507,6 @@ REG_OP(ReduceMean) *@par Third-party framework compatibility: * Compatible with the TensorFlow operator ReduceMean. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMean instead. */ REG_OP(ReduceMeanD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -573,9 +561,6 @@ REG_OP(ReduceMax) *@par Third-party framework compatibility * Compatible with TensorFlow operator Max. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMax instead. */ REG_OP(ReduceMaxD) .INPUT(x, TensorType({DT_FLOAT, DT_UINT8, DT_INT8, @@ -630,9 +615,6 @@ REG_OP(ReduceMin) *@par Third-party framework compatibility * Compatible with the TensorFlow operator reduce_min. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead. */ REG_OP(ReduceMinD) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) @@ -699,9 +681,6 @@ REG_OP(ReduceAny) * *@par Third-party framework compatibility *Compatible with the TensorFlow operator reduce_any. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceAny instead. */ REG_OP(ReduceAnyD) .INPUT(x, TensorType({DT_BOOL})) @@ -787,9 +766,6 @@ REG_OP(EuclideanNorm) *@par Third-party framework compatibility * Compatible with the TensorFlow operator EuclideanNorm. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use EuclideanNorm instead. */ REG_OP(EuclideanNormD) .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_FLOAT16})) diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h index e1a83f43..0766d2c6 100644 --- a/third_party/fwkacllib/inc/ops/rnn.h +++ b/third_party/fwkacllib/inc/ops/rnn.h @@ -92,6 +92,7 @@ REG_OP(DynamicLSTM) .OUTPUT(output_h, TensorType({DT_FLOAT32})) .OP_END_FACTORY_REG(DynamicLSTM) + /** *@brief: DynamicRNNGrad calculation. *@par Inputs: @@ -126,7 +127,7 @@ REG_OP(DynamicLSTM) *@li keep_prob:An float identifying the keep prob in the op. Default to 1. *@li cell_clip:An float identifying the cell clip in the op. Default to -1. *@li num_proj:An integer identifying the num projection in the op. Default to 0. -*@li time_major:An bool identifying the time major in the op. Default to false. +*@li time_major:An bool identifying the time major in the op. Default to true. *@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. *@li forget_bias:An float identifying the forget bias in the op. Default to 0. *@li is_training:An bool identifying is training in the op. Default to true. @@ -138,6 +139,9 @@ REG_OP(DynamicLSTM) *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dwci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dwcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dwco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. */ REG_OP(DynamicRNNGrad) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/save_ops.h b/third_party/fwkacllib/inc/ops/save_ops.h index 159e7382..7fd853d3 100644 --- a/third_party/fwkacllib/inc/ops/save_ops.h +++ b/third_party/fwkacllib/inc/ops/save_ops.h @@ -28,7 +28,7 @@ namespace ge { /** *@brief Mark which tensors need to be saved to the ckpt file. *@par Inputs: -*tensors: A list of input tensor.It's a dynamic input. +*tensors: A list of input tensor. *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ diff --git a/third_party/fwkacllib/inc/ops/sdca_ops.h b/third_party/fwkacllib/inc/ops/sdca_ops.h index dc6852d4..acf1c34d 100644 --- a/third_party/fwkacllib/inc/ops/sdca_ops.h +++ b/third_party/fwkacllib/inc/ops/sdca_ops.h @@ -35,16 +35,16 @@ namespace ge { *rate . \n *@par Inputs: -*@li sparse_example_indices: a list of vectors which contain example indices.It's a dynamic input. -*@li sparse_feature_indices: a list of vectors which contain feature indices.It's a dynamic input. -*@li sparse_feature_values: a list of vectors which contains feature value associated with each feature group.It's a dynamic input. -*@li dense_features: a list of matrices which contains the dense feature values.It's a dynamic input. +*@li sparse_example_indices: a list of vectors which contain example indices. +*@li sparse_feature_indices: a list of vectors which contain feature indices. +*@li sparse_feature_values: a list of vectors which contains feature value associated with each feature group. +*@li dense_features: a list of matrices which contains the dense feature values. *@li example_weights: a vector which contains the weight associated with each example. *@li example_labels: a vector which contains the label/target associated with each example. *@li sparse_indices: a list of vectors where each value is the indices which has -*corresponding weights in sparse_weights. This field maybe omitted for the dense approach.It's a dynamic input. +*corresponding weights in sparse_weights. This field maybe omitted for the dense approach. *@li sparse_weights: a list of vectors where each value is the weight associated with a sparse feature group. -*@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group.It's a dynamic input. +*@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group. *@li example_state_data: a list of vectors containing the example state data. *@li loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, squared and hinge losses. *@li l1: Symmetric l1 regularization strength. @@ -61,7 +61,6 @@ namespace ge { *@par Third-party framework compatibility * Compatible with tensorflow SdcaOptimizerV2 operator. */ - REG_OP(SdcaOptimizerV2) .DYNAMIC_INPUT(sparse_example_indices, TensorType({DT_INT64})) .DYNAMIC_INPUT(sparse_feature_indices, TensorType({DT_INT64})) diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index 613ce358..8ef4a42c 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -79,9 +79,6 @@ REG_OP(Range) *@see Range() *@since V100R001C33 -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use Range instead. */ REG_OP(RangeD) .INPUT(x, TensorType({DT_FLOAT,DT_INT32})) @@ -186,7 +183,8 @@ REG_OP(GatherNd) * uint8, int16, int8, int64, qint8, quint8, qint32, qint16, quint16, * uint16, complex128, float16, uint32, uint64, complex64, complex128. * @li indices: A Tensor of type int32 or int64. -* @li axis: A Tensor of type as int32 . \n +* @li axis: A Tensor of type as int32 or int64, +* Must be in the range [-rank(input_tensor), rank(input_tensor)) . \n *@par Outputs: *y: A Tensor. Has the same type as "x" . \n @@ -225,9 +223,6 @@ REG_OP(GatherV2) *@par Third-party framework compatibility * Compatible with the TensorFlow operator GatherV2. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use GatherV2 instead. */ REG_OP(GatherV2D) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT32, DT_INT8, DT_UINT8, @@ -330,9 +325,6 @@ REG_OP(StridedSlice) *@par Third-party framework compatibility * Compatible with the TensorFlow operator StridedSlice. - -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSlice instead. */ REG_OP(StridedSliceD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8, @@ -388,9 +380,6 @@ REG_OP(StridedSliceD) *@par Third-party framework compatibility * Compatible with the TensorFlow operator StridedSliceGradD. - -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSliceGrad instead. */ REG_OP(StridedSliceGradD) .INPUT(dy, TensorType::BasicType()) @@ -502,9 +491,6 @@ REG_OP(UnsortedSegmentSum) *@par Third-party framework compatibility * Compatible with the TensorFlow operator UnsortedSegmentSum. - -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use UnsortedSegmentSum instead. */ REG_OP(UnsortedSegmentSumD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_UINT8})) @@ -729,9 +715,6 @@ REG_OP(OneHot) *@par Third-party framework compatibility: * Compatible with the TensorFlow operator OneHot. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use OneHot instead. */ REG_OP(OneHotD) .INPUT(x, TensorType({DT_UINT8, DT_INT32})) @@ -807,7 +790,7 @@ REG_OP(SliceD) * @li assist_seq: A 1D tensor of type float16. * with size of 2N, which "N" is the last dimension. * The first N numbers is indices, and the next N numbers is deviation of casting -* int32 to float16. \n +* float16 to int32 . \n * @par Attributes: * @li k: A required int that is at least 0, specifying the number of top elements @@ -816,7 +799,7 @@ REG_OP(SliceD) * If true, the resulting "k" elements will be sorted by the values in descending * order. * @li dim: An optional int. Defaults to -1. For reserved use. -* @li largest: An optional bool. Defaults to true. For reserved use. \n +* @li largest: An optional bool. Defaults to true. For reserved use. * @par Outputs: * @li values: A Tensor, specifying the sorted data. Has the same type as "input". @@ -1270,9 +1253,6 @@ REG_OP(InplaceUpdate) *@par Third-party framework compatibility *Compatible with the TensorFlow operator InplaceUpdate. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceUpdate instead. */ REG_OP(InplaceUpdateD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) @@ -1325,9 +1305,6 @@ REG_OP(InplaceAdd) *@par Third-party framework compatibility *Compatible with the TensorFlow operator InplaceAdd. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceAdd instead. */ REG_OP(InplaceAddD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) @@ -1379,9 +1356,6 @@ REG_OP(InplaceSub) *@par Third-party framework compatibility *Compatible with the TensorFlow operator InplaceSub. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceSub instead. */ REG_OP(InplaceSubD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) @@ -1433,9 +1407,6 @@ REG_OP(ScatterNonAliasingAdd) * @par Outputs: * y: A Tensor of type RealNumberType . \n -* @attention Constraints: -* @li segment_ids must be non-negative tensor. - * @see UnsortedSegmentSum(), UnsortedSegmentProd(), * @par Third-party framework compatibility @@ -1463,9 +1434,6 @@ REG_OP(UnsortedSegmentMin) * @par Outputs: * y: A Tensor.Must have the same type as input "x" . \n -* @attention Constraints: -* @li segment_ids must be non-negative tensor. - * @see UnsortedSegmentProdD(), UnsortedSegmentSumD(), * * @par Restrictions: @@ -1491,9 +1459,6 @@ REG_OP(UnsortedSegmentMinD) * @par Outputs: * y: A Tensor of type RealNumberType . \n -* @attention Constraints: -* @li segment_ids must be non-negative tensor. - * @see UnsortedSegmentSum(), UnsortedSegmentProd(), * @par Third-party framework compatibility @@ -1521,9 +1486,6 @@ REG_OP(UnsortedSegmentMax) * @par Outputs: * y: A Tensor.Must have the same type as input "x" . \n -* @attention Constraints: -* @li segment_ids must be non-negative tensor. - * @see UnsortedSegmentProdD(), * * @par Restrictions: @@ -1548,9 +1510,6 @@ REG_OP(UnsortedSegmentMaxD) * @par Outputs: * y: A Tensor of type NumberType . \n -* @attention Constraints: -* @li segment_ids must be non-negative tensor. - * @see UnsortedSegmentSum(), UnsortedSegmentMin(), * @par Third-party framework compatibility @@ -1582,9 +1541,6 @@ REG_OP(UnsortedSegmentProd) * @li segment_ids must be non-negative tensor. * @see UnsortedSegmentMinD() -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use UnsortedSegmentProd instead. */ REG_OP(UnsortedSegmentProdD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16})) @@ -1900,9 +1856,6 @@ REG_OP(CumulativeLogsumexp) *y: A Tensor. Has the same type as "x". *@par Third-party framework compatibility * Compatible with the TensorFlow operator Cumsum. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use CumulativeLogsumexp instead. */ REG_OP(CumulativeLogsumexpD) .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16})) diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h index b0bd14c0..b66a0213 100644 --- a/third_party/fwkacllib/inc/ops/split_combination_ops.h +++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h @@ -75,9 +75,6 @@ REG_OP(Split) *@par Third-party framework compatibility * Compatible with the TensorFlow operator Split. - -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use Split instead. */ REG_OP(SplitD) .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, @@ -144,9 +141,6 @@ Under the caffe framework, the conversion of slice_point through the cut point t Under the caffe framework,size_splits or axis transformat to split_dim.Only one can effect. *@par Third-party framework compatibility * Compatible with the TensorFlow operator SplitV. - -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use SplitV instead. */ REG_OP(SplitVD) .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, @@ -164,8 +158,7 @@ REG_OP(SplitVD) * Two inputs, including: * @li values: A list of Tensors. Must be one of the following types: int8, int16, int32, * int64, uint8, uint16, uint32, uint64, float16, float32. -* Tensors to be concatenated. All must have size 1 in the first dimension and same shape. -* It's a dynamic input. +* Tensors to be concatenated. All must have size 1 in the first dimension and same shape. * @li shape: A Tensor of the same type as "x". * The final shape of the result. Should be equal to the shapes of any input * but with the number of input values in the first dimension . \n @@ -314,7 +307,7 @@ REG_OP(Concat) *@par Inputs: * x: A list of N Tensors. Must be one of the following types: int8, int16, int32, -* int64, uint8, uint16, uint32, uint64, float16, float32, bool . It's a dynamic input. \n +* int64, uint8, uint16, uint32, uint64, float16, float32, bool . \n *@par Attributes: *@li axis: A optional int, defaultvalue is 0. @@ -340,7 +333,7 @@ REG_OP(Pack) *@par Inputs: *Two inputs, including: * @li concat_dim: A Tensor of type int32. -* @li x: A list of 1D Tensor objects of type int32 . It's a dynamic input. \n +* @li x: A list of 1D Tensor objects of type int32 . \n *@par Attributes: *N: A required int . \n @@ -364,7 +357,7 @@ REG_OP(ConcatOffset) *@par Inputs: *Two inputs, including: * @li concat_dim: A Tensor of type int32. -* @li x: A list of 1D Tensor objects of type int32 . It's a dynamic input. \n +* @li x: A list of 1D Tensor objects of type int32 . \n *@par Attributes: *@li Concat_dim: A required int. Must be within the rank of input "x". diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index edc55820..ed46d95c 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -235,12 +235,8 @@ REG_OP(BatchToSpaceND) *@par Outputs: *y: A Tensor with format NC1HWC0. Has the same type as input "x". - *@par Third-party framework compatibility * Compatible with the TensorFlow operator BatchToSpaceND. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpaceND instead. */ REG_OP(BatchToSpaceNDD) .INPUT(x, TensorType::BasicType()) @@ -287,9 +283,6 @@ REG_OP(SpaceToBatchND) *@par Third-party framework compatibility * Compatible with the TensorFlow operator SpaceToBatchND. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use SpaceToBatchND instead. */ REG_OP(SpaceToBatchNDD) .INPUT(x, TensorType::BasicType()) @@ -411,9 +404,6 @@ REG_OP(BatchToSpace) *@par Third-party framework compatibility * Compatible with the TensorFlow operator BatchToSpace. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpace instead. */ REG_OP(BatchToSpaceD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, @@ -467,9 +457,6 @@ REG_OP(SpaceToBatch) *y: A Tensor. Has the same type as input "x". *@par Third-party framework compatibility *@ Compatible with the TensorFlow operator SpaceToBatch. -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use SpaceToBatch instead. */ REG_OP(SpaceToBatchD) .INPUT(x, TensorType::BasicType()) @@ -598,9 +585,6 @@ REG_OP(ExtractVolumePatches) *@par Outputs: *y: A Tensor. Has the same type as "x". -* -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ConfusionTranspose instead. */ REG_OP(ConfusionTransposeD) .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, @@ -664,11 +648,6 @@ REG_OP(FlattenV2) .ATTR(end_axis, Int, -1) .OP_END_FACTORY_REG(FlattenV2) -REG_OP(DeConvTrans) - .INPUT(x, TensorType({DT_INT8})) - .OUTPUT(y, TensorType({DT_INT8})) - .OP_END_FACTORY_REG(DeConvTrans) - /** *@brief Compress large weight to small one. Usually inserted before Conv2d. * diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index 4b08916e..17243802 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -19,7 +19,7 @@ #include -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -580,7 +580,8 @@ RTS_API rtError_t rtLabelListCpy(rtLabel_t *label, uint32_t labelNumber, void *d * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream); -#ifdef __cplusplus + +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index c64ed16f..6de84c02 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -19,7 +19,7 @@ #include "base.h" -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -185,7 +185,7 @@ RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType); */ RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size); -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h index cc74a5ed..39651817 100644 --- a/third_party/fwkacllib/inc/runtime/context.h +++ b/third_party/fwkacllib/inc/runtime/context.h @@ -19,7 +19,7 @@ #include "base.h" -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -149,7 +149,7 @@ RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t* groupInfo, uint */ RTS_API rtError_t rtGetGroupCount(uint32_t *count); -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index 048be69a..0bff548b 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -19,7 +19,7 @@ #include "base.h" -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -339,7 +339,7 @@ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int3 * @return RT_ERROR_NONE for ok */ RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value); -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/dvfsprofile.h b/third_party/fwkacllib/inc/runtime/dvfsprofile.h index 60f400b3..e27cd832 100644 --- a/third_party/fwkacllib/inc/runtime/dvfsprofile.h +++ b/third_party/fwkacllib/inc/runtime/dvfsprofile.h @@ -19,7 +19,7 @@ #include "base.h" -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -56,7 +56,7 @@ RTS_API rtError_t rtUnsetDvfsProfile(); */ RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode); -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h index 9dc44766..af7b16d8 100644 --- a/third_party/fwkacllib/inc/runtime/event.h +++ b/third_party/fwkacllib/inc/runtime/event.h @@ -19,7 +19,7 @@ #include "base.h" -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -229,7 +229,7 @@ RTS_API rtError_t rtNotifyGetAddrOffset(rtNotify_t notify, uint64_t *devAddrOffs */ RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num); -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index 956e033b..2030634a 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -20,7 +20,7 @@ #include "base.h" #include "stream.h" -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -529,7 +529,7 @@ RTS_API rtError_t rtStopOnlineProf(rtStream_t stream); * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtGetOnlineProfData(rtStream_t stream, rtProfDataInfo_t *pProfData, uint32_t profDataNum); -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index 8c1a4326..a506e94a 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -24,7 +24,7 @@ #include "config.h" #include "stream.h" -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -491,7 +491,7 @@ RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num); */ RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream); -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index 089a90b7..59a1ba7d 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -19,7 +19,7 @@ #include "base.h" -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -430,7 +430,7 @@ rtError_t rtDebugRegister(rtModel_t model, uint32_t flag, const void *addr, uint */ RTS_API rtError_t rtDebugUnRegister(rtModel_t model); -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h index 3123c3a9..ab542d89 100644 --- a/third_party/fwkacllib/inc/runtime/stream.h +++ b/third_party/fwkacllib/inc/runtime/stream.h @@ -20,7 +20,7 @@ #include "base.h" #include "event.h" -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) extern "C" { #endif @@ -188,7 +188,7 @@ RTS_API rtError_t rtStreamActive(rtStream_t active_stream, rtStream_t stream); */ RTS_API rtError_t rtStreamSwitchN(void *ptr, uint32_t size, void *valuePtr, rtStream_t *trueStreamPtr, uint32_t elementSize, rtStream_t stream, rtSwitchDataType_t dataType); -#ifdef __cplusplus +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h index a1c39a51..67adecd9 100644 --- a/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h +++ b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h @@ -1,12 +1,18 @@ /** -* @file adx_datadump_server.h -* -* Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -*/ + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #ifndef ADX_DATADUMP_SERVER_H #define ADX_DATADUMP_SERVER_H diff --git a/third_party/patch/securec/securec.patch001 b/third_party/patch/securec/securec.patch001 index 666f28ce..01c2d769 100644 --- a/third_party/patch/securec/securec.patch001 +++ b/third_party/patch/securec/securec.patch001 @@ -1,5 +1,5 @@ -diff -Npur -x .git bounds_checking_function/CMakeLists.txt securec/CMakeLists.txt ---- bounds_checking_function/CMakeLists.txt 1970-01-01 08:00:00.000000000 +0800 +diff -Npur -x .git libboundscheck/CMakeLists.txt securec/CMakeLists.txt +--- libboundscheck/CMakeLists.txt 1970-01-01 08:00:00.000000000 +0800 +++ securec/CMakeLists.txt 2020-09-19 16:53:48.689460700 +0800 @@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 3.14) From 0667dcfa3fdd763102aadf0ad95dbb97667fb598 Mon Sep 17 00:00:00 2001 From: zhoufeng Date: Wed, 14 Oct 2020 18:38:08 +0800 Subject: [PATCH 5/7] handle empty label resource Signed-off-by: zhoufeng --- src/ge/ge_runtime/runtime_model.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ge/ge_runtime/runtime_model.cc b/src/ge/ge_runtime/runtime_model.cc index f0405056..9f549313 100644 --- a/src/ge/ge_runtime/runtime_model.cc +++ b/src/ge/ge_runtime/runtime_model.cc @@ -344,6 +344,9 @@ void RuntimeModel::RtStreamDestory() noexcept { void RuntimeModel::RtLabelDestory() noexcept { for (size_t i = 0; i < label_list_.size(); i++) { + if (label_list_[i] == nullptr) { + continue; + } if (rtLabelDestroy(label_list_[i]) != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Destroy label failed! Index: %zu.", i); return; From 1f4f0a47b1aec2d32435431d6faf0de2ab81a25b Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Thu, 22 Oct 2020 09:38:44 +0800 Subject: [PATCH 6/7] speed up third party download via domestic source --- cmake/external_libs/eigen.cmake | 13 +++++++++++-- cmake/external_libs/gtest.cmake | 12 ++++++++++-- cmake/external_libs/json.cmake | 17 ++++++++++++++--- cmake/external_libs/onnx.cmake | 12 ++++++++++-- cmake/external_libs/protobuf.cmake | 13 +++++++++++-- 5 files changed, 56 insertions(+), 11 deletions(-) diff --git a/cmake/external_libs/eigen.cmake b/cmake/external_libs/eigen.cmake index b43e70b4..5cdfc346 100644 --- a/cmake/external_libs/eigen.cmake +++ b/cmake/external_libs/eigen.cmake @@ -1,10 +1,19 @@ set(Eigen3_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2") set(Eigen3_CFLAGS "-D_FORTIFY_SOURCE=2 -O2") set(Eigen3_NS "ge_") + +if (ENABLE_GITEE) + set(REQ_URL "https://gitee.com/mirrors/eigen-git-mirrorsource/repository/archive/3.3.7.tar.gz") + set(MD5 "cf6552a5d90c1aca4b5e0b011f65ea93") +else() + set(REQ_URL "https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.tar.gz") + set(MD5 "9e30f67e8531477de4117506fe44669b") +endif () + graphengine_add_pkg(Eigen3 VER 3.3.7 - URL https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.tar.gz - MD5 9e30f67e8531477de4117506fe44669b + URL ${REQ_URL} + MD5 ${MD5} CMAKE_OPTION -DBUILD_TESTING=OFF) find_package(Eigen3 3.3.7 REQUIRED ${GE_FIND_NO_DEFAULT_PATH}) diff --git a/cmake/external_libs/gtest.cmake b/cmake/external_libs/gtest.cmake index e6d1ab1d..5e175fd2 100644 --- a/cmake/external_libs/gtest.cmake +++ b/cmake/external_libs/gtest.cmake @@ -1,11 +1,19 @@ set(ge_gtest_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack") set(ge_gtest_CFLAGS "-D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack") +if (ENABLE_GITEE) + set(REQ_URL "https://gitee.com/mirrors/googletest/repository/archive/release-1.8.0.tar.gz") + set(MD5 "89e13ca1aa48d370719d58010b83f62c") +else() + set(REQ_URL "https://github.com/google/googletest/archive/release-1.8.0.tar.gz") + set(MD5 "16877098823401d1bf2ed7891d7dce36") +endif () + graphengine_add_pkg(ge_gtest VER 1.8.0 LIBS gtest gtest_main - URL https://github.com/google/googletest/archive/release-1.8.0.tar.gz - MD5 16877098823401d1bf2ed7891d7dce36 + URL ${REQ_URL} + MD5 ${MD5} CMAKE_OPTION -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON) diff --git a/cmake/external_libs/json.cmake b/cmake/external_libs/json.cmake index 4b9fa4e3..f2ae5310 100644 --- a/cmake/external_libs/json.cmake +++ b/cmake/external_libs/json.cmake @@ -1,9 +1,20 @@ set(nlohmann_json_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2") set(nlohmann_json_CFLAGS "-D_FORTIFY_SOURCE=2 -O2") + +if (ENABLE_GITEE) + set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip") + set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7") + set(INCLUDE "./include") +else() + set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip") + set(MD5 "0dc903888211db3a0f170304cd9f3a89") + set(INCLUDE "./") +endif () + graphengine_add_pkg(ge_nlohmann_json VER 3.6.1 - HEAD_ONLY ./ - URL https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip - MD5 0dc903888211db3a0f170304cd9f3a89) + HEAD_ONLY ${INCLUDE} + URL ${REQ_URL} + MD5 ${MD5}) include_directories(${ge_nlohmann_json_INC}) add_library(graphengine::json ALIAS ge_nlohmann_json) \ No newline at end of file diff --git a/cmake/external_libs/onnx.cmake b/cmake/external_libs/onnx.cmake index 621f67c6..a092f964 100644 --- a/cmake/external_libs/onnx.cmake +++ b/cmake/external_libs/onnx.cmake @@ -1,5 +1,13 @@ +if (ENABLE_GITEE) + set(REQ_URL "https://gitee.com/mirrors/ONNX/repository/archive/v1.6.0.tar.gz") + set(MD5 "1bdbcecdd68ea8392630467646776e02") +else() + set(REQ_URL "https://github.com/onnx/onnx/releases/download/v1.6.0/onnx-1.6.0.tar.gz") + set(MD5 "512f2779d6215d4a36f366b6b9acdf1e") +endif () + graphengine_add_pkg(onnx VER 1.6.0 HEAD_ONLY ./ - URL https://github.com/onnx/onnx/releases/download/v1.6.0/onnx-1.6.0.tar.gz - MD5 512f2779d6215d4a36f366b6b9acdf1e) \ No newline at end of file + URL ${REQ_URL} + MD5 ${MD5}) diff --git a/cmake/external_libs/protobuf.cmake b/cmake/external_libs/protobuf.cmake index bbd86bc4..8be594c7 100644 --- a/cmake/external_libs/protobuf.cmake +++ b/cmake/external_libs/protobuf.cmake @@ -5,12 +5,21 @@ set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") set(_ge_tmp_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) string(REPLACE " -Wall" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") string(REPLACE " -Werror" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + +if (ENABLE_GITEE) + set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz") + set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236") +else() + set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz") + set(MD5 "3d9e32700639618a4d2d342c99d4507a") +endif () + graphengine_add_pkg(protobuf VER 3.8.0 LIBS protobuf EXE protoc - URL https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz - MD5 3d9e32700639618a4d2d342c99d4507a + URL ${REQ_URL} + MD5 ${MD5} CMAKE_PATH ../cmake/ CMAKE_OPTION -Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_BUILD_SHARED_LIBS=OFF) set(CMAKE_CXX_FLAGS ${_ge_tmp_CMAKE_CXX_FLAGS}) From b25b70c9c2e6d09aba0815234fbe76f7e9b28d43 Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Fri, 23 Oct 2020 20:08:45 +0800 Subject: [PATCH 7/7] add stub files --- src/common/graph/stub/Makefile | 6 + src/common/graph/stub/gen_stubapi.py | 578 +++++++++++++++++++++++++++++++++++ src/ge/stub/Makefile | 6 + src/ge/stub/README | 4 + src/ge/stub/README.md | 44 +++ src/ge/stub/gen_stubapi.py | 578 +++++++++++++++++++++++++++++++++++ 6 files changed, 1216 insertions(+) create mode 100644 src/common/graph/stub/Makefile create mode 100644 src/common/graph/stub/gen_stubapi.py create mode 100644 src/ge/stub/Makefile create mode 100644 src/ge/stub/README create mode 100755 src/ge/stub/README.md create mode 100644 src/ge/stub/gen_stubapi.py diff --git a/src/common/graph/stub/Makefile b/src/common/graph/stub/Makefile new file mode 100644 index 00000000..f339fa33 --- /dev/null +++ b/src/common/graph/stub/Makefile @@ -0,0 +1,6 @@ +inc_path := $(shell pwd)/metadef/inc/external/ +out_path := $(shell pwd)/out/graph/lib64/stub/ +stub_path := $(shell pwd)/metadef/graph/stub/ + +mkdir_stub := $(shell mkdir -p $(out_path)) +graph_local_stub := $(shell $(HI_PYTHON) $(stub_path)/gen_stubapi.py $(inc_path) $(out_path)) diff --git a/src/common/graph/stub/gen_stubapi.py b/src/common/graph/stub/gen_stubapi.py new file mode 100644 index 00000000..7263ff17 --- /dev/null +++ b/src/common/graph/stub/gen_stubapi.py @@ -0,0 +1,578 @@ +import os +import re +import sys +import logging + +logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(levelname)s: %(message)s', + level=logging.INFO) + +""" + this attr is used for symbol table visible +""" +GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY' + +""" + generate stub func body by return type +""" +RETURN_STATEMENTS = { + 'graphStatus': ' std::cout << "[ERROR]: stub library libgraph or libge_compiler cannot be used for execution, please check your "\n ' + ' << "environment variables and compilation options to make sure you use the correct library."\n' + ' << std::endl;\n' + ' return ACL_ERROR_COMPILING_STUB_MODE;', + 'Status': ' return SUCCESS;', + 'Graph': ' return Graph();', + 'Graph&': ' return *this;', + 'Format': ' return Format();', + 'Format&': ' return *this;', + 'Shape': ' return Shape();', + 'Shape&': ' return *this;', + 'TensorDesc': ' return TensorDesc();', + 'TensorDesc&': ' return *this;', + 'Tensor': ' return Tensor();', + 'Tensor&': ' return *this;', + 'Operator': ' return Operator();', + 'Operator&': ' return *this;', + 'Ptr': ' return nullptr;', + 'std::string': ' return "";', + 'std::string&': ' return "";', + 'string': ' return "";', + 'int': ' return 0;', + 'DataType': ' return DT_FLOAT;', + 'InferenceContextPtr': ' return nullptr;', + 'SubgraphBuilder': ' return nullptr;', + 'OperatorImplPtr': ' return nullptr;', + 'OutHandler': ' return nullptr;', + 'std::vector': ' return {};', + 'std::vector': ' return {};', + 'std::map': ' return {};', + 'uint32_t': ' return 0;', + 'int64_t': ' return 0;', + 'uint64_t': ' return 0;', + 'size_t': ' return 0;', + 'float': ' return 0.0f;', + 'bool': ' return false;', +} + +""" + max code len per line in hua_wei software programming specifications +""" +max_code_len_per_line = 100 + +""" + white_list_for_debug, include_dir_key_words is to + determines which header files to generate cc files from + when DEBUG on +""" +white_list_for_debug = ["attr_value.h", "operator.h", "tensor.h", "graph.h", "operator_factory.h", "inference_context.h", + "ge_ir_build.h", "ge_api.h", "ascend_string.h", "gnode.h"] +include_dir_key_words = ["ge", "graph"] +DEBUG = True + + +def need_generate_func(func_line): + """ + :param func_line: + :return: + """ + if func_line.strip().endswith("default") or func_line.strip().endswith("delete") \ + or func_line.strip().startswith("typedef") or func_line.strip().startswith("using"): + return False + return True + + +def file_endswith_white_list_suffix(file): + """ + :param file: + :return: + """ + if DEBUG: + for suffix in white_list_for_debug: + if file.endswith(suffix): + return True + return False + else: + return True + + +""" + belows are patterns used for analyse .h file +""" +# pattern function +pattern_func = re.compile(r"""(^[\s]*) #leading with space,we will find and delete after +([a-zA-Z~_] # void int likely +.* +[)] #we find ) +(?!.*{) # we do not want the case int abc() const +.*) +(;.*) #we want to find ; and after for we will replace these later +\n$ +""", re.VERBOSE | re.MULTILINE | re.DOTALL) + +# pattern comment +pattern_comment = re.compile(r'^\s*//') +pattern_comment_2_start = re.compile(r'^\s*/[*]') +pattern_comment_2_end = re.compile(r'[*]/\s*$') +# pattern define +pattern_define = re.compile(r'^\s*#define') +pattern_define_return = re.compile(r'\\\s*$') +# blank line +pattern_blank_line = re.compile(r'^\s*$') +# virtual,explicit,friend,static +pattern_keyword = re.compile(r'(virtual\s+|explicit\s+|friend\s+|static\s+)') +# lead space +pattern_leading_space = re.compile(r'(^[\s]*)[a-zA-Z~_]') +# functions will have patterns such as func ( or func( +# but operator is an exception; the class name is preceded by an operator, and the above mode does not exist +# format like :"operator = ()" +pattern_func_name = re.compile(r'([a-zA-Z0-9~_\-]+\s*|operator?.*)[(]') +# template +pattern_template = re.compile(r'^\s*template') +pattern_template_end = re.compile(r'>\s*$') +# namespace +pattern_namespace = re.compile(r'namespace.*{') +# class : which can handle classA a and {not on the same line, but if found ';' after class,then don't deal with +pattern_class = re.compile(r'^[\s]*(class|struct)\s+(%s\s+)?([a-zA-Z0-9_\-]+ 0 and not friend_match: + line, func_name = self.handle_class_member_func(line, template_string) + # Normal functions + else: + line, func_name = self.handle_normal_func(line, template_string) + + need_generate = need_generate_func(line) + # func body + line += self.implement_function(line) + # comment + line = self.gen_comment(start_i) + line + # write to out file + self.write_func_content(line, func_name, need_generate) + # next loop + self.line_index += 1 + + logging.info('Added %s functions', len(self.func_list_exist)) + logging.info('Successfully converted,please see ' + self.output_file) + + def handle_func1(self, line): + """ + :param line: + :return: + """ + find1 = re.search('[(]', line) + if not find1: + self.line_index += 1 + return "continue", line, None + find2 = re.search('[)]', line) + start_i = self.line_index + space_match = pattern_leading_space.search(line) + # deal with + # int abc(int a, + # int b) + if find1 and (not find2): + self.line_index += 1 + line2 = self.input_content[self.line_index] + if space_match: + line2 = re.sub('^' + space_match.group(1), '', line2) + line += line2 + while self.line_index < len(self.input_content) and (not re.search('[)]', line2)): + self.line_index += 1 + line2 = self.input_content[self.line_index] + line2 = re.sub('^' + space_match.group(1), '', line2) + line += line2 + + match_start = pattern_start.search(self.input_content[self.line_index]) + match_end = pattern_end.search(self.input_content[self.line_index]) + if match_start: # like ) { or ) {} int the last line + if not match_end: + self.stack.append('normal_now') + ii = start_i + while ii <= self.line_index: + ii += 1 + self.line_index += 1 + return "continue", line, start_i + logging.info("line[%s]", line) + # ' int abc();'->'int abc()' + (line, match) = pattern_func.subn(r'\2\n', line) + logging.info("line[%s]", line) + # deal with case: + # 'int \n abc(int a, int b)' + if re.search(r'^\s*(inline)?\s*[a-zA-Z0-9_]+\s*$', self.input_content[start_i - 1]): + line = self.input_content[start_i - 1] + line + line = line.lstrip() + if not match: + self.line_index += 1 + return "continue", line, start_i + return "pass", line, start_i + + def handle_stack(self, match_start): + """ + :param match_start: + :return: + """ + line = self.input_content[self.line_index] + match_end = pattern_end.search(line) + if match_start: + self.stack.append('normal_now') + if match_end: + top_status = self.stack.pop() + if top_status == 'namespace_now': + self.output_fd.write(line + '\n') + elif top_status == 'class_now': + self.stack_class.pop() + self.stack_template.pop() + if match_start or match_end: + self.line_index += 1 + return "continue" + + if len(self.stack) > 0 and self.stack[-1] == 'normal_now': + self.line_index += 1 + return "continue" + return "pass" + + def handle_class(self, template_string, line, match_start, match_class): + """ + :param template_string: + :param line: + :param match_start: + :param match_class: + :return: + """ + if match_class: # we face a class + self.stack_template.append(template_string) + self.stack.append('class_now') + class_name = match_class.group(3) + + # class template specializations: class A > + if '<' in class_name: + k = line.index('<') + fit = 1 + for ii in range(k + 1, len(line)): + if line[ii] == '<': + fit += 1 + if line[ii] == '>': + fit -= 1 + if fit == 0: + break + class_name += line[k + 1:ii + 1] + logging.info('class_name[%s]', class_name) + self.stack_class.append(class_name) + while not match_start: + self.line_index += 1 + line = self.input_content[self.line_index] + match_start = pattern_start.search(line) + self.line_index += 1 + return "continue" + return "pass" + + def handle_template(self): + line = self.input_content[self.line_index] + match_template = pattern_template.search(line) + template_string = '' + if match_template: + match_template_end = pattern_template_end.search(line) + template_string = line + while not match_template_end: + self.line_index += 1 + line = self.input_content[self.line_index] + template_string += line + match_template_end = pattern_template_end.search(line) + self.line_index += 1 + return template_string + + def handle_namespace(self): + line = self.input_content[self.line_index] + match_namespace = pattern_namespace.search(line) + if match_namespace: # we face namespace + self.output_fd.write(line + '\n') + self.stack.append('namespace_now') + self.line_index += 1 + + def handle_normal_func(self, line, template_string): + template_line = '' + self.stack_template.append(template_string) + if self.stack_template[-1] != '': + template_line = re.sub(r'\s*template', 'template', self.stack_template[-1]) + # change '< class T = a, class U = A(3)>' to '' + template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line) + template_line = re.sub(r'\s*=.*,', ',', template_line) + template_line = re.sub(r'\s*=.*', '', template_line) + line = re.sub(r'\s*=.*,', ',', line) + line = re.sub(r'\s*=.*\)', ')', line) + line = template_line + line + self.stack_template.pop() + func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group() + logging.info("line[%s]", line) + logging.info("func_name[%s]", func_name) + return line, func_name + + def handle_class_member_func(self, line, template_string): + template_line = '' + x = '' + if template_string != '': + template_string = re.sub(r'\s*template', 'template', template_string) + template_string = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_string) + template_string = re.sub(r'\s*=.*,', ',', template_string) + template_string = re.sub(r'\s*=.*', '', template_string) + if self.stack_template[-1] != '': + if not (re.search(r'<\s*>', stack_template[-1])): + template_line = re.sub(r'^\s*template', 'template', stack_template[-1]) + if not (re.search(r'<.*>', self.stack_class[-1])): + # for x we get like template -> + x = re.sub(r'template\s*<', '<', template_line) # remove template -> + x = re.sub(r'\n', '', x) + x = re.sub(r'\s*=.*,', ',', x) + x = re.sub(r'\s*=.*\>', '>', x) + x = x.rstrip() # remove \n + x = re.sub(r'(class|typename)\s+|(|\s*class)', '', + x) # remove class,typename -> + x = re.sub(r'<\s+', '<', x) + x = re.sub(r'\s+>', '>', x) + x = re.sub(r'\s+,', ',', x) + x = re.sub(r',\s+', ', ', x) + line = re.sub(r'\s*=\s+0', '', line) + line = re.sub(r'\s*=\s+.*,', ',', line) + line = re.sub(r'\s*=\s+.*\)', ')', line) + logging.info("x[%s]\nline[%s]", x, line) + # if the function is long, void ABC::foo() + # breaks into two lines void ABC::\n foo() + temp_line = pattern_func_name.sub(self.stack_class[-1] + x + '::' + r'\1(', line, count=1) + if len(temp_line) > max_code_len_per_line: + line = pattern_func_name.sub(self.stack_class[-1] + x + '::\n' + r'\1(', line, count=1) + else: + line = temp_line + logging.info("line[%s]", line) + # add template as the above if there is one + template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line) + template_line = re.sub(r'\s*=.*,', ',', template_line) + template_line = re.sub(r'\s*=.*', '', template_line) + line = template_line + template_string + line + func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group() + logging.info("line[%s]", line) + logging.info("func_name[%s]", func_name) + return line, func_name + + def write_func_content(self, content, func_name, need_generate): + if not (func_name in self.func_list_exist) and need_generate: + self.output_fd.write(content) + self.func_list_exist.append(func_name) + logging.info('add func:[%s]', func_name) + + def gen_comment(self, start_i): + comment_line = '' + # Function comments are on top of function declarations, copy them over + k = start_i - 1 # one line before this func start + if pattern_template.search(self.input_content[k]): + k -= 1 + if pattern_comment_2_end.search(self.input_content[k]): + comment_line = self.input_content[k].lstrip() + while not pattern_comment_2_start.search(self.input_content[k]): + k -= 1 + comment_line = self.input_content[k].lstrip() + comment_line + else: + for j in range(k, 0, -1): + c_line = self.input_content[j] + if pattern_comment.search(c_line): + c_line = re.sub(r'\s*//', '//', c_line) + comment_line = c_line + comment_line + else: + break + return comment_line + + @staticmethod + def implement_function(func): + function_def = '' + function_def += '{\n' + + all_items = func.split() + start = 0 + return_type = all_items[start] + if return_type == "const": + start += 1 + return_type = all_items[start] + if return_type.startswith(('std::map', 'std::set', 'std::vector')): + return_type = "std::map" + if return_type.endswith('*') or (len(all_items) > start + 1 and all_items[start + 1].startswith('*')): + return_type = "Ptr" + if len(all_items) > start + 1 and all_items[start + 1].startswith('&'): + return_type += "&" + if RETURN_STATEMENTS.__contains__(return_type): + function_def += RETURN_STATEMENTS[return_type] + else: + logging.warning("Unhandled return type[%s]", return_type) + + function_def += '\n' + function_def += '}\n' + function_def += '\n' + return function_def + + +def collect_header_files(path): + """ + :param path: + :return: + """ + header_files = [] + shared_includes_content = [] + for root, dirs, files in os.walk(path): + files.sort() + for file in files: + if file.find("git") >= 0: + continue + if not file.endswith('.h'): + continue + file_path = os.path.join(root, file) + file_path = file_path.replace('\\', '/') + header_files.append(file_path) + include_str = '#include "{}"\n'.format(file_path[path.rindex('/') + 1:]) + shared_includes_content.append(include_str) + # for acl error code + shared_includes_content.append('#include \n') + shared_includes_content.append('const int ACL_ERROR_COMPILING_STUB_MODE = 100039;\n') + return header_files, shared_includes_content + + +def generate_stub_file(inc_dir, out_cc_dir): + """ + :param inc_dir: + :param out_cc_dir: + :return: + """ + target_header_files, shared_includes_content = collect_header_files(inc_dir) + for header_file in target_header_files: + if not file_endswith_white_list_suffix(header_file): + continue + cc_file = re.sub('.h*$', '.cc', header_file) + h_2_cc = H2CC(header_file, out_cc_dir + cc_file[cc_file.rindex('/') + 1:], shared_includes_content) + h_2_cc.h2cc() + + +def gen_code(inc_dir, out_cc_dir): + """ + :param inc_dir: + :param out_cc_dir: + :return: + """ + if not inc_dir.endswith('/'): + inc_dir += '/' + if not out_cc_dir.endswith('/'): + out_cc_dir += '/' + for include_dir_key_word in include_dir_key_words: + generate_stub_file(inc_dir + include_dir_key_word, out_cc_dir) + + +if __name__ == '__main__': + inc_dir = sys.argv[1] + out_cc_dir = sys.argv[2] + gen_code(inc_dir, out_cc_dir) diff --git a/src/ge/stub/Makefile b/src/ge/stub/Makefile new file mode 100644 index 00000000..820fc70d --- /dev/null +++ b/src/ge/stub/Makefile @@ -0,0 +1,6 @@ +inc_path := $(shell pwd)/inc/external/ +out_path := $(shell pwd)/out/ge/lib64/stub/ +stub_path := $(shell pwd)/framework/domi/stub/ + +mkdir_stub := $(shell mkdir -p $(out_path)) +local_stub := $(shell $(HI_PYTHON) $(stub_path)/gen_stubapi.py $(inc_path) $(out_path)) diff --git a/src/ge/stub/README b/src/ge/stub/README new file mode 100644 index 00000000..ca98ce85 --- /dev/null +++ b/src/ge/stub/README @@ -0,0 +1,4 @@ +################################################################################### +the directory (stub) saves the stub file +gen_stubapi.py is using for retrieving API and generating stub functions +################################################################################### diff --git a/src/ge/stub/README.md b/src/ge/stub/README.md new file mode 100755 index 00000000..a085e537 --- /dev/null +++ b/src/ge/stub/README.md @@ -0,0 +1,44 @@ +# "stub" usage: + +## Description + +- File libge_compiler.so ,libgraph.so are used in IR build application interface. + +# Attention + +- Don't link other library except libge_compiler.so ,libgraph.so, as they may be changed in the future. + +# Usage + +## Compile: compile the application invoking the IR build API. + +Makefile: + +''' + +ATC_INCLUDE_DIR := $(ASCEND_PATH)/atc/include +OPP_INCLUDE_DIR := $(ASCEND_PATH)/opp/op_proto/built-in/inc +LOCAL_MODULE_NAME := ir_build +CC := g++ +CFLAGS := -std=c++11 -g -Wall +SRCS := $(wildcard $(LOCAL_DIR)/main.cpp) +INCLUDES := -I $(ASCEND_OPP_PATH)/op_proto/built-in/inc \ + -I $(ATC_INCLUDE_DIR)/graph \ + -I $(ATC_INCLUDE_DIR)/ge \ + +LIBS := -L ${ASCEND_PATH}/atc/lib64/stub \ + -lgraph \ + -lge_compiler +ir_build: + mkdir -p out + $(CC) $(SRCS) $(INCLUDES) $(LIBS) $(CFLAGS) -o ./out/$(LOCAL_MODULE_NAME) +clean: + rm -rf out + +''' +make + +## Run the application after set the LD_LIBRARY_PATH to include the real path of the library which locates in the directory of atc/lib64 + +export LD_LIBRARY_PATH= $(ASCEND_PATH)/atc/lib64 + - ./ ir_build diff --git a/src/ge/stub/gen_stubapi.py b/src/ge/stub/gen_stubapi.py new file mode 100644 index 00000000..b6e1e70c --- /dev/null +++ b/src/ge/stub/gen_stubapi.py @@ -0,0 +1,578 @@ +import os +import re +import sys +import logging + +logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(levelname)s: %(message)s', + level=logging.INFO) + +""" + this attr is used for symbol table visible +""" +GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY' + +""" + generate stub func body by return type +""" +RETURN_STATEMENTS = { + 'graphStatus': ' std::cout << "[ERROR]: stub library libgraph or libge_compiler cannot be used for execution, please check your "\n ' + ' << "environment variables and compilation options to make sure you use the correct library."\n' + ' << std::endl;\n' + ' return ACL_ERROR_COMPILING_STUB_MODE;', + 'Status': ' return SUCCESS;', + 'Graph': ' return Graph();', + 'Graph&': ' return *this;', + 'Format': ' return Format();', + 'Format&': ' return *this;', + 'Shape': ' return Shape();', + 'Shape&': ' return *this;', + 'TensorDesc': ' return TensorDesc();', + 'TensorDesc&': ' return *this;', + 'Tensor': ' return Tensor();', + 'Tensor&': ' return *this;', + 'Operator': ' return Operator();', + 'Operator&': ' return *this;', + 'Ptr': ' return nullptr;', + 'std::string': ' return "";', + 'std::string&': ' return "";', + 'string': ' return "";', + 'int': ' return 0;', + 'DataType': ' return DT_FLOAT;', + 'InferenceContextPtr': ' return nullptr;', + 'SubgraphBuilder': ' return nullptr;', + 'OperatorImplPtr': ' return nullptr;', + 'OutHandler': ' return nullptr;', + 'std::vector': ' return {};', + 'std::vector': ' return {};', + 'std::map': ' return {};', + 'uint32_t': ' return 0;', + 'int64_t': ' return 0;', + 'uint64_t': ' return 0;', + 'size_t': ' return 0;', + 'float': ' return 0.0f;', + 'bool': ' return false;', +} + +""" + max code len per line in hua_wei software programming specifications +""" +max_code_len_per_line = 100 + +""" + white_list_for_debug, include_dir_key_words is to + determines which header files to generate cc files from + when DEBUG on +""" +white_list_for_debug = ["attr_value.h", "operator.h", "tensor.h", "graph.h", "operator_factory.h", + "ge_ir_build.h", "ge_api.h", "tensorflow_parser.h", "caffe_parser.h"] +include_dir_key_words = ["ge", "graph", "parser"] +DEBUG = True + + +def need_generate_func(func_line): + """ + :param func_line: + :return: + """ + if func_line.strip().endswith("default") or func_line.strip().endswith("delete") \ + or func_line.strip().startswith("typedef") or func_line.strip().startswith("using"): + return False + return True + + +def file_endswith_white_list_suffix(file): + """ + :param file: + :return: + """ + if DEBUG: + for suffix in white_list_for_debug: + if file.endswith(suffix): + return True + return False + else: + return True + + +""" + belows are patterns used for analyse .h file +""" +# pattern function +pattern_func = re.compile(r"""(^[\s]*) #leading with space,we will find and delete after +([a-zA-Z~_] # void int likely +.* +[)] #we find ) +(?!.*{) # we do not want the case int abc() const { return 1;} +.*) +(;.*) #we want to find ; and after for we will replace these later +\n$ +""", re.VERBOSE | re.MULTILINE | re.DOTALL) + +# pattern comment +pattern_comment = re.compile(r'^\s*//') +pattern_comment_2_start = re.compile(r'^\s*/[*]') +pattern_comment_2_end = re.compile(r'[*]/\s*$') +# pattern define +pattern_define = re.compile(r'^\s*#define') +pattern_define_return = re.compile(r'\\\s*$') +# blank line +pattern_blank_line = re.compile(r'^\s*$') +# virtual,explicit,friend,static +pattern_keyword = re.compile(r'(virtual\s+|explicit\s+|friend\s+|static\s+)') +# lead space +pattern_leading_space = re.compile(r'(^[\s]*)[a-zA-Z~_]') +# functions will have patterns such as func ( or func( +# but operator is an exception; the class name is preceded by an operator, and the above mode does not exist +# format like :"operator = ()" +pattern_func_name = re.compile(r'([a-zA-Z0-9~_\-]+\s*|operator?.*)[(]') +# template +pattern_template = re.compile(r'^\s*template') +pattern_template_end = re.compile(r'>\s*$') +# namespace +pattern_namespace = re.compile(r'namespace.*{') +# class : which can handle classA a and {not on the same line, but if found ';' after class,then don't deal with +pattern_class = re.compile(r'^[\s]*(class|struct)\s+(%s\s+)?([a-zA-Z0-9_\-]+ 0 and not friend_match: + line, func_name = self.handle_class_member_func(line, template_string) + # Normal functions + else: + line, func_name = self.handle_normal_func(line, template_string) + + need_generate = need_generate_func(line) + # func body + line += self.implement_function(line) + # comment + line = self.gen_comment(start_i) + line + # write to out file + self.write_func_content(line, func_name, need_generate) + # next loop + self.line_index += 1 + + logging.info('Added %s functions', len(self.func_list_exist)) + logging.info('Successfully converted,please see ' + self.output_file) + + def handle_func1(self, line): + """ + :param line: + :return: + """ + find1 = re.search('[(]', line) + if not find1: + self.line_index += 1 + return "continue", line, None + find2 = re.search('[)]', line) + start_i = self.line_index + space_match = pattern_leading_space.search(line) + # deal with + # int abc(int a, + # int b) + if find1 and (not find2): + self.line_index += 1 + line2 = self.input_content[self.line_index] + if space_match: + line2 = re.sub('^' + space_match.group(1), '', line2) + line += line2 + while self.line_index < len(self.input_content) and (not re.search('[)]', line2)): + self.line_index += 1 + line2 = self.input_content[self.line_index] + line2 = re.sub('^' + space_match.group(1), '', line2) + line += line2 + + match_start = pattern_start.search(self.input_content[self.line_index]) + match_end = pattern_end.search(self.input_content[self.line_index]) + if match_start: # like ) { or ) {} int the last line + if not match_end: + self.stack.append('normal_now') + ii = start_i + while ii <= self.line_index: + ii += 1 + self.line_index += 1 + return "continue", line, start_i + logging.info("line[%s]", line) + # ' int abc();'->'int abc()' + (line, match) = pattern_func.subn(r'\2\n', line) + logging.info("line[%s]", line) + # deal with case: + # 'int \n abc(int a, int b)' + if re.search(r'^\s*(inline)?\s*[a-zA-Z0-9_]+\s*$', self.input_content[start_i - 1]): + line = self.input_content[start_i - 1] + line + line = line.lstrip() + if not match: + self.line_index += 1 + return "continue", line, start_i + return "pass", line, start_i + + def handle_stack(self, match_start): + """ + :param match_start: + :return: + """ + line = self.input_content[self.line_index] + match_end = pattern_end.search(line) + if match_start: + self.stack.append('normal_now') + if match_end: + top_status = self.stack.pop() + if top_status == 'namespace_now': + self.output_fd.write(line + '\n') + elif top_status == 'class_now': + self.stack_class.pop() + self.stack_template.pop() + if match_start or match_end: + self.line_index += 1 + return "continue" + + if len(self.stack) > 0 and self.stack[-1] == 'normal_now': + self.line_index += 1 + return "continue" + return "pass" + + def handle_class(self, template_string, line, match_start, match_class): + """ + :param template_string: + :param line: + :param match_start: + :param match_class: + :return: + """ + if match_class: # we face a class + self.stack_template.append(template_string) + self.stack.append('class_now') + class_name = match_class.group(3) + + # class template specializations: class A > + if '<' in class_name: + k = line.index('<') + fit = 1 + for ii in range(k + 1, len(line)): + if line[ii] == '<': + fit += 1 + if line[ii] == '>': + fit -= 1 + if fit == 0: + break + class_name += line[k + 1:ii + 1] + logging.info('class_name[%s]', class_name) + self.stack_class.append(class_name) + while not match_start: + self.line_index += 1 + line = self.input_content[self.line_index] + match_start = pattern_start.search(line) + self.line_index += 1 + return "continue" + return "pass" + + def handle_template(self): + line = self.input_content[self.line_index] + match_template = pattern_template.search(line) + template_string = '' + if match_template: + match_template_end = pattern_template_end.search(line) + template_string = line + while not match_template_end: + self.line_index += 1 + line = self.input_content[self.line_index] + template_string += line + match_template_end = pattern_template_end.search(line) + self.line_index += 1 + return template_string + + def handle_namespace(self): + line = self.input_content[self.line_index] + match_namespace = pattern_namespace.search(line) + if match_namespace: # we face namespace + self.output_fd.write(line + '\n') + self.stack.append('namespace_now') + self.line_index += 1 + + def handle_normal_func(self, line, template_string): + template_line = '' + self.stack_template.append(template_string) + if self.stack_template[-1] != '': + template_line = re.sub(r'\s*template', 'template', self.stack_template[-1]) + # change '< class T = a, class U = A(3)>' to '' + template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line) + template_line = re.sub(r'\s*=.*,', ',', template_line) + template_line = re.sub(r'\s*=.*', '', template_line) + line = re.sub(r'\s*=.*,', ',', line) + line = re.sub(r'\s*=.*\)', ')', line) + line = template_line + line + self.stack_template.pop() + func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group() + logging.info("line[%s]", line) + logging.info("func_name[%s]", func_name) + return line, func_name + + def handle_class_member_func(self, line, template_string): + template_line = '' + x = '' + if template_string != '': + template_string = re.sub(r'\s*template', 'template', template_string) + template_string = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_string) + template_string = re.sub(r'\s*=.*,', ',', template_string) + template_string = re.sub(r'\s*=.*', '', template_string) + if self.stack_template[-1] != '': + if not (re.search(r'<\s*>', stack_template[-1])): + template_line = re.sub(r'^\s*template', 'template', stack_template[-1]) + if not (re.search(r'<.*>', self.stack_class[-1])): + # for x we get like template -> + x = re.sub(r'template\s*<', '<', template_line) # remove template -> + x = re.sub(r'\n', '', x) + x = re.sub(r'\s*=.*,', ',', x) + x = re.sub(r'\s*=.*\>', '>', x) + x = x.rstrip() # remove \n + x = re.sub(r'(class|typename)\s+|(|\s*class)', '', + x) # remove class,typename -> + x = re.sub(r'<\s+', '<', x) + x = re.sub(r'\s+>', '>', x) + x = re.sub(r'\s+,', ',', x) + x = re.sub(r',\s+', ', ', x) + line = re.sub(r'\s*=\s+0', '', line) + line = re.sub(r'\s*=\s+.*,', ',', line) + line = re.sub(r'\s*=\s+.*\)', ')', line) + logging.info("x[%s]\nline[%s]", x, line) + # if the function is long, void ABC::foo() + # breaks into two lines void ABC::\n foo() + temp_line = pattern_func_name.sub(self.stack_class[-1] + x + '::' + r'\1(', line, count=1) + if len(temp_line) > max_code_len_per_line: + line = pattern_func_name.sub(self.stack_class[-1] + x + '::\n' + r'\1(', line, count=1) + else: + line = temp_line + logging.info("line[%s]", line) + # add template as the above if there is one + template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line) + template_line = re.sub(r'\s*=.*,', ',', template_line) + template_line = re.sub(r'\s*=.*', '', template_line) + line = template_line + template_string + line + func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group() + logging.info("line[%s]", line) + logging.info("func_name[%s]", func_name) + return line, func_name + + def write_func_content(self, content, func_name, need_generate): + if not (func_name in self.func_list_exist) and need_generate: + self.output_fd.write(content) + self.func_list_exist.append(func_name) + logging.info('add func:[%s]', func_name) + + def gen_comment(self, start_i): + comment_line = '' + # Function comments are on top of function declarations, copy them over + k = start_i - 1 # one line before this func start + if pattern_template.search(self.input_content[k]): + k -= 1 + if pattern_comment_2_end.search(self.input_content[k]): + comment_line = self.input_content[k].lstrip() + while not pattern_comment_2_start.search(self.input_content[k]): + k -= 1 + comment_line = self.input_content[k].lstrip() + comment_line + else: + for j in range(k, 0, -1): + c_line = self.input_content[j] + if pattern_comment.search(c_line): + c_line = re.sub(r'\s*//', '//', c_line) + comment_line = c_line + comment_line + else: + break + return comment_line + + @staticmethod + def implement_function(func): + function_def = '' + function_def += '{\n' + + all_items = func.split() + start = 0 + return_type = all_items[start] + if return_type == "const": + start += 1 + return_type = all_items[start] + if return_type.startswith(('std::map', 'std::set', 'std::vector')): + return_type = "std::map" + if return_type.endswith('*') or (len(all_items) > start + 1 and all_items[start + 1].startswith('*')): + return_type = "Ptr" + if len(all_items) > start + 1 and all_items[start + 1].startswith('&'): + return_type += "&" + if RETURN_STATEMENTS.__contains__(return_type): + function_def += RETURN_STATEMENTS[return_type] + else: + logging.warning("Unhandled return type[%s]", return_type) + + function_def += '\n' + function_def += '}\n' + function_def += '\n' + return function_def + + +def collect_header_files(path): + """ + :param path: + :return: + """ + header_files = [] + shared_includes_content = [] + for root, dirs, files in os.walk(path): + files.sort() + for file in files: + if file.find("git") >= 0: + continue + if not file.endswith('.h'): + continue + file_path = os.path.join(root, file) + file_path = file_path.replace('\\', '/') + header_files.append(file_path) + include_str = '#include "{}"\n'.format(file_path[path.rindex('/') + 1:]) + shared_includes_content.append(include_str) + # for acl error code + shared_includes_content.append('#include \n') + shared_includes_content.append('const int ACL_ERROR_COMPILING_STUB_MODE = 100039;\n') + return header_files, shared_includes_content + + +def generate_stub_file(inc_dir, out_cc_dir): + """ + :param inc_dir: + :param out_cc_dir: + :return: + """ + target_header_files, shared_includes_content = collect_header_files(inc_dir) + for header_file in target_header_files: + if not file_endswith_white_list_suffix(header_file): + continue + cc_file = re.sub('.h*$', '.cc', header_file) + h_2_cc = H2CC(header_file, out_cc_dir + cc_file[cc_file.rindex('/') + 1:], shared_includes_content) + h_2_cc.h2cc() + + +def gen_code(inc_dir, out_cc_dir): + """ + :param inc_dir: + :param out_cc_dir: + :return: + """ + if not inc_dir.endswith('/'): + inc_dir += '/' + if not out_cc_dir.endswith('/'): + out_cc_dir += '/' + for include_dir_key_word in include_dir_key_words: + generate_stub_file(inc_dir + include_dir_key_word, out_cc_dir) + + +if __name__ == '__main__': + inc_dir = sys.argv[1] + out_cc_dir = sys.argv[2] + gen_code(inc_dir, out_cc_dir)