diff --git a/CMakeLists.txt b/CMakeLists.txt index 86f473e8..266ea024 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,6 +91,7 @@ else() find_library(register libregister.so ${ASCEND_RUNTIME_DIR}) find_library(resource libresource.so ${ASCEND_RUNTIME_DIR}) find_library(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) + find_library(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) endif() # add compile flags diff --git a/inc/external/ge/ge_prof.h b/inc/external/ge/ge_prof.h index dbd87966..658cea76 100644 --- a/inc/external/ge/ge_prof.h +++ b/inc/external/ge/ge_prof.h @@ -25,22 +25,11 @@ namespace ge { enum ProfDataTypeConfig { - kProfAcl = 0x0001, kProfTaskTime = 0x0002, kProfAiCoreMetrics = 0x0004, kProfAicpuTrace = 0x0008, - kProfModelExecute = 0x0010, - kProfRuntimeApi = 0x0020, - kProfRuntimeTrace = 0x0040, - kProfScheduleTimeline = 0x0080, - kProfScheduleTrace = 0x0100, - kProfAiVectorCoreMetrics = 0x0200, - kProfSubtaskTime = 0x0400, kProfTrainingTrace = 0x0800, - kProfHcclTrace = 0x1000, - kProfDataProcess = 0x2000, - kProfTaskTrace = 0x3842, - kProfModelLoad = 0x8000000000000000 + kProfHcclTrace = 0x1000 }; enum ProfilingAicoreMetrics { @@ -49,20 +38,64 @@ enum ProfilingAicoreMetrics { kAicoreSynchronization = 2, kAicoreMemory = 3, kAicoreInternalMemory = 4, - kAicoreStall = 5, - kAicoreMetricsAll = 255 // only for op_trace + kAicoreStall = 5 }; typedef struct ProfAicoreEvents ProfAicoreEvents; typedef struct aclgrphProfConfig aclgrphProfConfig; +/// +/// @ingroup AscendCL +/// @brief Initialize the profiling and set profiling configuration path +/// @param [in] profiler_path: configuration path of profiling +/// @param [in] length: length of configuration path +/// @return Status result of function +/// Status aclgrphProfInit(const char *profiler_path, uint32_t length); + +/// +/// @ingroup AscendCL +/// @brief Finalize profiling +/// @return Status result of function +/// Status aclgrphProfFinalize(); + +/// +/// @ingroup AscendCL +/// @brief Create data of type aclgrphProfConfig +/// @param [in] deviceid_list: device id list +/// @param [in] device_nums: device numbers +/// @param [in] aicore_metrics: type of aicore metrics +/// @param [in] aicore_events: pointer to aicore events be reserved, only support NULL now +/// @param [in] data_type_config: modules need profiling +/// @return Status result of function +/// aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t device_nums, ProfilingAicoreMetrics aicore_metrics, ProfAicoreEvents *aicore_events, uint64_t data_type_config); + +/// +/// @ingroup AscendCL +/// @brief Destroy data of type aclgrphProfConfig +/// @param [in] profiler_config: config of profiling +/// @return Status result of function +/// Status aclgrphProfDestroyConfig(aclgrphProfConfig *profiler_config); + +/// +/// @ingroup AscendCL +/// @brief Start profiling of modules which is configured by profiler config +/// @param [in] profiler_config: config of profiling +/// @return Status result of function +/// Status aclgrphProfStart(aclgrphProfConfig *profiler_config); + +/// +/// @ingroup AscendCL +/// @brief Stop profiling of modules which is configured by profiler config +/// @param [in] profiler_config: config of profiling +/// @return Status result of function +/// Status aclgrphProfStop(aclgrphProfConfig *profiler_config); } // namespace ge diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index 9a4fd1f9..6033521c 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -48,6 +48,8 @@ enum OpEngineType { ENGINE_AIVECTOR = 4 // not support }; +enum InputAippType { DATA_WITHOUT_AIPP = 0, DATA_WITH_STATIC_AIPP, DATA_WITH_DYNAMIC_AIPP, DYNAMIC_AIPP_NODE }; + const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 00846112..6e82bb96 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -163,6 +163,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); ge::Status GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info); + ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); + ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector &input_desc, std::vector &output_desc); diff --git a/inc/graph/debug/ge_attr_define.h b/inc/graph/debug/ge_attr_define.h index a32907bb..7538ba6a 100644 --- a/inc/graph/debug/ge_attr_define.h +++ b/inc/graph/debug/ge_attr_define.h @@ -141,8 +141,12 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_OUTPUTS; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_DIMS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_AIPP_INPUT_DIMS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DATA_RELATED_AIPP_MODE; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DATA_AIPP_DATA_NAME_MAP; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_GRAPH_HAS_BEEN_ADDED; + GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_GRAPH_NAME; diff --git a/inc/graph/runtime_inference_context.h b/inc/graph/runtime_inference_context.h index 6c6c82e7..f0b38546 100644 --- a/inc/graph/runtime_inference_context.h +++ b/inc/graph/runtime_inference_context.h @@ -23,6 +23,7 @@ #include #include "external/graph/ge_error_codes.h" #include "external/graph/tensor.h" +#include "ge_attr_value.h" namespace ge { class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY RuntimeInferenceContext { @@ -32,10 +33,12 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY RuntimeInferenceContext { static void DestroyContext(const std::string &context_id); graphStatus SetTensor(int64_t node_id, int output_id, Tensor &&tensor); + graphStatus GetTensor(int64_t node_id, int output_id, GeTensorPtr &tensor); graphStatus GetTensor(int64_t node_id, int output_id, Tensor &tensor); private: std::map> tensors_; + std::map> ge_tensors_; std::mutex mu_; static std::map> contexts_; diff --git a/src/common/graph/ge_attr_define.cc b/src/common/graph/ge_attr_define.cc index 4834c73b..cd504812 100644 --- a/src/common/graph/ge_attr_define.cc +++ b/src/common/graph/ge_attr_define.cc @@ -122,8 +122,12 @@ const std::string ATTR_NAME_AIPP_INPUTS = "_aipp_inputs"; const std::string ATTR_NAME_AIPP_OUTPUTS = "_aipp_outputs"; const std::string ATTR_NAME_INPUT_DIMS = "input_dims"; +const std::string ATTR_DYNAMIC_AIPP_INPUT_DIMS = "_dynamic_aipp_input_dims"; +const std::string ATTR_DATA_RELATED_AIPP_MODE = "_data_related_aipp_mode"; +const std::string ATTR_DATA_AIPP_DATA_NAME_MAP = "_data_aipp_data_name_map"; const std::string ATTR_NAME_GRAPH_HAS_BEEN_ADDED = "_graph_has_been_added"; + const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id"; const std::string ATTR_NAME_PARENT_GRAPH_NAME = "_parent_graph_name"; diff --git a/src/common/graph/ge_tensor.cc b/src/common/graph/ge_tensor.cc index 196b8569..65881435 100644 --- a/src/common/graph/ge_tensor.cc +++ b/src/common/graph/ge_tensor.cc @@ -431,7 +431,7 @@ graphStatus GeTensorDesc::GetShapeRange(std::vector> return GRAPH_FAILED; } std::pair pair({ele[0], ele[1]}); - range.push_back(pair); + range.emplace_back(pair); } return GRAPH_SUCCESS; diff --git a/src/common/graph/graph.mk b/src/common/graph/graph.mk index 9e9ffa3a..4ea84919 100644 --- a/src/common/graph/graph.mk +++ b/src/common/graph/graph.mk @@ -33,7 +33,6 @@ COMMON_LOCAL_SRC_FILES := \ ./utils/tuning_utils.cc \ ./utils/graph_utils.cc \ ./utils/ge_ir_utils.cc \ - ./utils/node_utils.cc \ ./utils/op_desc_utils.cc \ ./utils/type_utils.cc \ ./utils/tensor_utils.cc \ @@ -44,6 +43,7 @@ COMMON_LOCAL_SRC_FILES := \ option/ge_context.cc \ option/ge_local_context.cc \ ./runtime_inference_context.cc \ + ./utils/node_utils.cc \ COMMON_LOCAL_C_INCLUDES := \ proto/om.proto \ diff --git a/src/common/graph/runtime_inference_context.cc b/src/common/graph/runtime_inference_context.cc index 95068481..361d893c 100644 --- a/src/common/graph/runtime_inference_context.cc +++ b/src/common/graph/runtime_inference_context.cc @@ -15,6 +15,7 @@ */ #include "graph/runtime_inference_context.h" +#include "graph/utils/tensor_adapter.h" #include #include "framework/common/debug/ge_log.h" @@ -67,6 +68,14 @@ graphStatus RuntimeInferenceContext::SetTensor(int64_t node_id, int output_id, T GELOGD("Set tensor for node_id = %ld, output_id = %d", node_id, output_id); output_tensors[output_id] = std::move(tensor); + + auto &output_ge_tensors = ge_tensors_[node_id]; + if (static_cast(output_id) >= output_ge_tensors.size()) { + output_ge_tensors.resize(output_id + 1); + } + + GELOGD("Set ge tensor for node_id = %ld, output_id = %d", node_id, output_id); + output_ge_tensors[output_id] = TensorAdapter::AsGeTensorPtr(tensor); return GRAPH_SUCCESS; } @@ -93,4 +102,28 @@ graphStatus RuntimeInferenceContext::GetTensor(int64_t node_id, int output_id, T tensor = output_tensors[output_id]; return GRAPH_SUCCESS; } + +graphStatus RuntimeInferenceContext::GetTensor(int64_t node_id, int output_id, GeTensorPtr &tensor) { + if (output_id < 0) { + GELOGE(GRAPH_PARAM_INVALID, "Invalid output index: %d", output_id); + return GRAPH_PARAM_INVALID; + } + + std::lock_guard lk(mu_); + auto iter = ge_tensors_.find(node_id); + if (iter == ge_tensors_.end()) { + GELOGE(INTERNAL_ERROR, "Node not register. Id = %ld", node_id); + return INTERNAL_ERROR; + } + + auto &output_tensors = iter->second; + if (static_cast(output_id) >= output_tensors.size()) { + GELOGE(GRAPH_FAILED, "Node output is not registered. node_id = %ld, output index = %d", node_id, output_id); + return GRAPH_FAILED; + } + + GELOGD("Get ge tensor for node_id = %ld, output_id = %d", node_id, output_id); + tensor = output_tensors[output_id]; + return GRAPH_SUCCESS; +} } // namespace ge \ No newline at end of file diff --git a/src/common/graph/utils/node_utils.cc b/src/common/graph/utils/node_utils.cc index 72981d10..684e37ac 100644 --- a/src/common/graph/utils/node_utils.cc +++ b/src/common/graph/utils/node_utils.cc @@ -14,8 +14,8 @@ * limitations under the License. */ -#include "utils/node_utils.h" -#include "utils/op_desc_utils.h" +#include "graph/utils/node_utils.h" +#include "graph/utils/op_desc_utils.h" #include "graph/utils/graph_utils.h" #include "debug/ge_op_types.h" #include "debug/ge_util.h" @@ -23,8 +23,13 @@ #include "graph/anchor.h" #include "graph/debug/ge_attr_define.h" #include "graph/types.h" -#include "utils/tensor_utils.h" -#include "utils/type_utils.h" +#include "external/graph/operator.h" +#include "graph/ge_context.h" +#include "graph/runtime_inference_context.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/tensor_utils.h" +#include "graph/utils/tensor_adapter.h" +#include "graph/utils/type_utils.h" namespace ge { std::map> NodeUtils::map_send_info_{}; @@ -575,6 +580,58 @@ graphStatus NodeUtils::GetNodeUnknownShapeStatus(const Node &node, bool &is_unkn return GRAPH_SUCCESS; } +graphStatus NodeUtils::GetInputConstData(const ConstNodePtr &node_ptr, const string &dst_name, GeTensorPtr &ge_tensor) { + GE_CHECK_NOTNULL(node_ptr); + return NodeUtils::GetInputConstData(*node_ptr, dst_name, ge_tensor); +} + +graphStatus NodeUtils::GetInputConstData(const Node &node, const string &dst_name, GeTensorPtr &ge_tensor) { + // For inner compute graph + auto op_desc = node.GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + auto index = op_desc->GetInputIndexByName(dst_name); + auto in_data_anchor = node.GetInDataAnchor(index); + GE_CHECK_NOTNULL(in_data_anchor); + auto out_data_anchor = in_data_anchor->GetPeerOutAnchor(); + GE_CHECK_NOTNULL(out_data_anchor); + auto peer_node = out_data_anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(peer_node); + auto peer_op_desc = peer_node->GetOpDesc(); + GE_CHECK_NOTNULL(peer_op_desc); + auto peer_op_type = peer_op_desc->GetType(); + if (peer_op_type == CONSTANTOP || peer_op_type == CONSTANT) { + if (!AttrUtils::MutableTensor(peer_node->GetOpDesc(), ATTR_NAME_WEIGHTS, ge_tensor)) { + GELOGW("get attr name %s failed.", ATTR_NAME_WEIGHTS.c_str()); + return GRAPH_FAILED; + } + return GRAPH_SUCCESS; + } else if (peer_op_type == DATA) { + auto parent_node = NodeUtils::GetParentInput(peer_node); + while ((parent_node != nullptr) && (parent_node->GetType() == DATA)) { + parent_node = NodeUtils::GetParentInput(parent_node); + } + if ((parent_node != nullptr) && ((parent_node->GetType() == CONSTANT) || (parent_node->GetType() == CONSTANTOP))) { + if (!AttrUtils::MutableTensor(parent_node->GetOpDesc(), ATTR_NAME_WEIGHTS, ge_tensor)) { + GELOGW("get attr name %s failed.", ATTR_NAME_WEIGHTS.c_str()); + return GRAPH_FAILED; + } + return GRAPH_SUCCESS; + } + } + // Try get from runtime inference context + auto session_id = std::to_string(GetContext().SessionId()); + RuntimeInferenceContext *runtime_infer_ctx = nullptr; + if (RuntimeInferenceContext::GetContext(session_id, &runtime_infer_ctx) == GRAPH_SUCCESS) { + GELOGD("To get constant from runtime inference context. session_id = %s", session_id.c_str()); + auto ret = runtime_infer_ctx->GetTensor(peer_node->GetOpDesc()->GetId(), out_data_anchor->GetIdx(), ge_tensor); + if (ret == GRAPH_SUCCESS) { + return GRAPH_SUCCESS; + } + } + GELOGW("node[%s]'s input[%s]'s peer node is not const", node.GetName().c_str(), dst_name.c_str()); + return GRAPH_FAILED; +} + std::string NodeUtils::GetNodeType(const Node &node) { if (node.GetType() != FRAMEWORKOP) { return node.GetType(); @@ -587,14 +644,6 @@ std::string NodeUtils::GetNodeType(const Node &node) { std::string NodeUtils::GetNodeType(const NodePtr &node) { return node == nullptr ? "" : GetNodeType(*node); } -graphStatus NodeUtils::GetInputConstData(const ConstNodePtr &node_ptr, const string &dst_name, GeTensorPtr &ge_tensor) { - return GRAPH_SUCCESS; -} - -graphStatus NodeUtils::GetInputConstData(const Node &node, const string &dst_name, GeTensorPtr &ge_tensor) { - return GRAPH_SUCCESS; -} - ComputeGraphPtr NodeUtils::GetSubgraph(const Node &node, uint32_t index) { auto op_desc = node.GetOpDesc(); if (op_desc == nullptr) { diff --git a/src/ge/CMakeLists.txt b/src/ge/CMakeLists.txt index 8c20b336..db00d8a1 100755 --- a/src/ge/CMakeLists.txt +++ b/src/ge/CMakeLists.txt @@ -51,6 +51,7 @@ include_directories(${GE_SOURCE_DIR}/inc/graph) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) +include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/toolchain) include_directories(${CMAKE_BINARY_DIR}) include_directories(${CMAKE_BINARY_DIR}/proto/ge) @@ -227,6 +228,7 @@ target_link_libraries(ge_runner ${runtime} ${resouce} ${ascend_hal} + ${adump_server} rt dl) @@ -237,6 +239,7 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "common/dump/dump_properties.cc" "common/dump/dump_manager.cc" "common/dump/dump_op.cc" + "common/dump/dump_server.cc" "common/formats/format_transfers/*.cc" "common/formats/formats.cc" "common/formats/utils/formats_trans_utils.cc" diff --git a/src/ge/client/ge_prof.cc b/src/ge/client/ge_prof.cc index d4407852..ad9cc9eb 100644 --- a/src/ge/client/ge_prof.cc +++ b/src/ge/client/ge_prof.cc @@ -29,13 +29,14 @@ using std::vector; namespace { const uint32_t kMaxDeviceNum = 64; -const std::string PROFILING_INIT = "prof_init"; -const std::string PROFILING_FINALIZE = "prof_finalize"; -const std::string PROFILING_START = "prof_start"; -const std::string PROFILING_STOP = "prof_stop"; -const std::string DEVICES_NUMS = "devNums"; -const std::string DEVICE_ID_LIST = "devIdList"; -const std::string AICORE_METRICS = "aicoreMetrics"; +const uint32_t kDeviceListIndex = 3; +const std::string kProfilingInit = "prof_init"; +const std::string kProfilingFinalize = "prof_finalize"; +const std::string kProfilingStart = "prof_start"; +const std::string kProfilingStop = "prof_stop"; +const std::string kDeviceNums = "devNums"; +const std::string kDeviceIdList = "devIdList"; +const std::string kAicoreMetrics = "aicoreMetrics"; const std::map kProfAicoreMetricsToString = { {ge::kAicoreArithmaticThroughput, "AICORE_ARITHMATIC_THROUGHPUT"}, @@ -43,25 +44,7 @@ const std::map kProfAicoreMetricsToStri {ge::kAicoreSynchronization, "AICORE_SYNCHRONIZATION"}, {ge::kAicoreMemory, "AICORE_MEMORY"}, {ge::kAicoreInternalMemory, "AICORE_INTERNAL_MEMORY"}, - {ge::kAicoreStall, "AICORE_STALL"}, - {ge::kAicoreMetricsAll, "AICORE_METRICS_ALL"}}; - -const std::map kDataTypeConfigMapping = {{ge::kProfAcl, PROF_ACL_API}, - {ge::kProfTaskTime, PROF_TASK_TIME}, - {ge::kProfAiCoreMetrics, PROF_AICORE_METRICS}, - {ge::kProfAicpuTrace, PROF_AICPU_TRACE}, - {ge::kProfModelExecute, PROF_MODEL_EXECUTE}, - {ge::kProfRuntimeApi, PROF_RUNTIME_API}, - {ge::kProfRuntimeTrace, PROF_RUNTIME_TRACE}, - {ge::kProfScheduleTimeline, PROF_SCHEDULE_TIMELINE}, - {ge::kProfScheduleTrace, PROF_SCHEDULE_TRACE}, - {ge::kProfAiVectorCoreMetrics, PROF_AIVECTORCORE_METRICS}, - {ge::kProfSubtaskTime, PROF_SUBTASK_TIME}, - {ge::kProfTrainingTrace, PROF_TRAINING_TRACE}, - {ge::kProfHcclTrace, PROF_HCCL_TRACE}, - {ge::kProfDataProcess, PROF_DATA_PROCESS}, - {ge::kProfTaskTrace, PROF_TASK_TRACE}, - {ge::kProfModelLoad, PROF_MODEL_LOAD}}; + {ge::kAicoreStall, "AICORE_STALL"}}; } // namespace static bool g_graph_prof_init_ = false; @@ -107,11 +90,11 @@ Status aclgrphProfInit(const char *profiler_path, uint32_t length) { GraphLoader graph_loader; Command command; command.cmd_params.clear(); - command.cmd_type = PROFILING_INIT; - command.module_index = kProfModelLoad | kProfTrainingTrace; + command.cmd_type = kProfilingInit; + command.module_index = PROF_MODEL_LOAD; ret = graph_loader.CommandHandle(command); if (ret != SUCCESS) { - GELOGE(ret, "Handle profiling command %s failed, config = %s", PROFILING_INIT.c_str(), profiler_path); + GELOGE(ret, "Handle profiling command %s failed, config = %s", kProfilingInit.c_str(), profiler_path); return ret; } if (!g_graph_prof_init_) { @@ -143,10 +126,10 @@ Status aclgrphProfFinalize() { GraphLoader graph_loader; Command command; command.cmd_params.clear(); - command.cmd_type = PROFILING_FINALIZE; + command.cmd_type = kProfilingFinalize; Status ret = graph_loader.CommandHandle(command); if (ret != SUCCESS) { - GELOGE(ret, "Handle profiling command %s failed.", PROFILING_FINALIZE.c_str()); + GELOGE(ret, "Handle profiling command %s failed.", kProfilingFinalize.c_str()); return ret; } @@ -164,9 +147,9 @@ Status aclgrphProfFinalize() { bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector &prof_config_params) { prof_config_params.clear(); - prof_config_params.emplace_back(DEVICES_NUMS); + prof_config_params.emplace_back(kDeviceNums); prof_config_params.emplace_back(std::to_string(profiler_config->config.devNums)); - prof_config_params.emplace_back(DEVICE_ID_LIST); + prof_config_params.emplace_back(kDeviceIdList); std::string devID = ""; if (profiler_config->config.devNums == 0) { GELOGW("The device num is invalid."); @@ -180,7 +163,7 @@ bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector(profiler_config->config.aicoreMetrics)); if (iter == kProfAicoreMetricsToString.end()) { @@ -250,13 +233,7 @@ aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t dev } config->config.aicoreMetrics = static_cast(aicore_metrics); - uint64_t data_type = 0; - for (auto &iter : kDataTypeConfigMapping) { - if ((iter.first & data_type_config) == iter.first) { - data_type |= iter.second; - } - } - config->config.dataTypeConfig = data_type; + config->config.dataTypeConfig = data_type_config; GELOGI("Successfully create prof config."); return config; } @@ -309,9 +286,11 @@ Status aclgrphProfStart(aclgrphProfConfig *profiler_config) { GraphLoader graph_loader; Command command; command.cmd_params.clear(); - command.cmd_type = PROFILING_START; + command.cmd_type = kProfilingStart; command.cmd_params = prof_params; command.module_index = profiler_config->config.dataTypeConfig; + GELOGI("Profiling will start, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), + prof_params[kDeviceListIndex].c_str(), command.module_index); ret = graph_loader.CommandHandle(command); if (ret != SUCCESS) { GELOGE(ret, "Handle profiling command failed"); @@ -360,9 +339,11 @@ Status aclgrphProfStop(aclgrphProfConfig *profiler_config) { GraphLoader graph_loader; Command command; command.cmd_params.clear(); - command.cmd_type = PROFILING_STOP; + command.cmd_type = kProfilingStop; command.cmd_params = prof_params; command.module_index = profiler_config->config.dataTypeConfig; + GELOGI("Profiling will stop, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), + prof_params[kDeviceListIndex].c_str(), command.module_index); ret = graph_loader.CommandHandle(command); if (ret != SUCCESS) { GELOGE(ret, "Handle profiling command failed"); diff --git a/src/ge/common/dump/dump_server.cc b/src/ge/common/dump/dump_server.cc new file mode 100644 index 00000000..1f95dc3a --- /dev/null +++ b/src/ge/common/dump/dump_server.cc @@ -0,0 +1,21 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "adx_datadump_server.h" + +int AdxDataDumpServerUnInit() { return 0; } + +int AdxDataDumpServerInit() { return 0; } diff --git a/src/ge/common/profiling/profiling_manager.cc b/src/ge/common/profiling/profiling_manager.cc index d301f647..d02f7e8f 100644 --- a/src/ge/common/profiling/profiling_manager.cc +++ b/src/ge/common/profiling/profiling_manager.cc @@ -55,19 +55,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In bool convert_2_phy_device_id) { #ifdef DAVINCI_SUPPORT_PROFILING vector().swap(device_id_); - // profiling need phy device id - if (!convert_2_phy_device_id) { - device_id_.push_back(options.device_id); - } else { - uint32_t phy_device_id = 0; - rtError_t rt_ret = rtGetDevicePhyIdByIndex(static_cast(options.device_id), &phy_device_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); - return FAILED; - } - device_id_.push_back(phy_device_id); - } - job_id_ = options.job_id; Status ret; @@ -76,6 +63,20 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In ret = InitFromAclCfg(recv_profiling_config_); } else { ret = InitFromOptions(options); + if (ret == SUCCESS && is_load_profiling_) { + // profiling need phy device id + if (!convert_2_phy_device_id) { + device_id_.push_back(options.device_id); + } else { + uint32_t phy_device_id = 0; + rtError_t rt_ret = rtGetDevicePhyIdByIndex(static_cast(options.device_id), &phy_device_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); + return FAILED; + } + device_id_.push_back(phy_device_id); + } + } } if (ret != SUCCESS) { GELOGE(ret, "Failed to init profiling."); @@ -868,14 +869,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin } GELOGI("Current logic_device_id:%d", logic_device_id); - uint32_t phy_device_id = 0; - rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); - } - GELOGI("Current phy_device_id:%d", phy_device_id); bool execute_model_prof_on = false; - auto iter = std::find(device_id_.begin(), device_id_.end(), phy_device_id); + auto iter = std::find(device_id_.begin(), device_id_.end(), logic_device_id); if (iter != device_id_.end()) { execute_model_prof_on = true; } diff --git a/src/ge/common/util.cc b/src/ge/common/util.cc index cbd2ee71..ce5aa57e 100644 --- a/src/ge/common/util.cc +++ b/src/ge/common/util.cc @@ -58,7 +58,7 @@ const int kWarningThreshold = 536870912 * 2; // 536870912 represent 512M const int kMaxFileSizeLimit = INT_MAX; const int kMaxBuffSize = 256; const char *const kPathValidReason = "The path can only contain 'a-z' 'A-Z' '0-9' '-' '.' '_' and chinese character"; -constexpr uint32_t MAX_CONFIG_FILE_BYTE = 10 * 1024 * 1024; +constexpr uint32_t kMaxConfigFileByte = 10 * 1024 * 1024; } // namespace namespace ge { @@ -512,9 +512,9 @@ FMK_FUNC_HOST_VISIBILITY bool IsValidFile(const char *file_path) { stat.st_mode); return false; } - if (stat.st_size > MAX_CONFIG_FILE_BYTE) { + if (stat.st_size > kMaxConfigFileByte) { GELOGE(PARAM_INVALID, "config file %s size[%ld] is larger than max config file Bytes[%u]", - resolved_file_path.c_str(), stat.st_size, MAX_CONFIG_FILE_BYTE); + resolved_file_path.c_str(), stat.st_size, kMaxConfigFileByte); return false; } return true; diff --git a/src/ge/executor/ge_executor.cc b/src/ge/executor/ge_executor.cc index bf1e250b..0a247142 100644 --- a/src/ge/executor/ge_executor.cc +++ b/src/ge/executor/ge_executor.cc @@ -745,6 +745,22 @@ Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo GELOGI("GetAIPPInfo succ."); return SUCCESS; } + +Status GeExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { + GELOGI("Begin to get aipp type."); + if (!isInit_) { + GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); + return GE_EXEC_NOT_INIT; + } + Status ret = GraphExecutor::GetAippType(model_id, index, type, aipp_index); + if (ret != SUCCESS) { + GELOGW("Get aipp type is not success."); + return ret; + } + GELOGI("Get aipp type success."); + return SUCCESS; +} + Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info) { GELOGI("Begin to get dynamic batch output shape info"); if (!isInit_) { diff --git a/src/ge/ge_inference.mk b/src/ge/ge_inference.mk index f83e590a..232e79ec 100644 --- a/src/ge/ge_inference.mk +++ b/src/ge/ge_inference.mk @@ -29,6 +29,7 @@ COMMON_LOCAL_SRC_FILES := \ common/dump/dump_properties.cc \ common/dump/dump_manager.cc \ common/dump/dump_op.cc \ + common/dump/dump_server.cc \ common/helper/model_cache_helper.cc \ ge_local_engine/engine/host_cpu_engine.cc \ @@ -371,7 +372,6 @@ LOCAL_SRC_FILES += $(BUILER_SRC_FILES) LOCAL_SRC_FILES += $(ANALYZER_SRC_FILES) LOCAL_STATIC_LIBRARIES := libge_memory \ - libadump_server_stub \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ @@ -436,7 +436,6 @@ LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES) LOCAL_C_INCLUDES += $(ANALYZER_LOCAL_INCLUDES) LOCAL_STATIC_LIBRARIES := libge_memory \ - libadump_server_stub \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ diff --git a/src/ge/ge_runner.mk b/src/ge/ge_runner.mk index 7a65787c..04182070 100644 --- a/src/ge/ge_runner.mk +++ b/src/ge/ge_runner.mk @@ -1,5 +1,5 @@ LOCAL_PATH := $(call my-dir) - +include $(LOCAL_PATH)/stub/Makefile LIBGE_LOCAL_SRC_FILES := \ proto/fusion_model.proto \ proto/optimizer_priority.proto \ @@ -392,8 +392,8 @@ endif LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) -LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc -LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_prof.cc +LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc \ + ../../out/ge/lib64/stub/ge_prof.cc \ LOCAL_SHARED_LIBRARIES := diff --git a/src/ge/graph/build/memory/block_mem_assigner.cc b/src/ge/graph/build/memory/block_mem_assigner.cc index 746f73c2..773eac6a 100644 --- a/src/ge/graph/build/memory/block_mem_assigner.cc +++ b/src/ge/graph/build/memory/block_mem_assigner.cc @@ -413,7 +413,8 @@ BlockMemAssigner::BlockMemAssigner(ComputeGraphPtr compute_graph, const map &reusable_block_counts, const Me } bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, - uint32_t &peer_input_index) { + uint32_t &peer_input_index, bool &no_need_assign_memory) { if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { return false; } @@ -571,6 +572,11 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou // If GetBool fail, is_input_continuous is false. (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + + GE_IF_BOOL_EXEC(is_input_continuous && CheckIsZeroMemNodeType(peer_node->GetType()), + GELOGI("Node[%s] output[%u] no_need_assign_memory.", n->GetName().c_str(), out_index); + no_need_assign_memory = true; return false;); + if (is_input_continuous) { if (n->GetOwnerComputeGraph() != nullptr) { string graph_name = n->GetOwnerComputeGraph()->GetName(); @@ -828,6 +834,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, } } memory_blocks_.emplace_back(block); + blocks_store_.emplace_back(block); return block; } @@ -1143,8 +1150,10 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector bool out_node_set_continuous_input = false; bool no_need_assign_memory = ((size == 0) || CheckIsZeroMemNodeType(node->GetType())); if (!no_need_assign_memory) { - out_node_set_continuous_input = IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index); - no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input); + out_node_set_continuous_input = + IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index, no_need_assign_memory); + GE_IF_BOOL_EXEC(!no_need_assign_memory, + no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input);); } no_need_assign_memory = (no_need_assign_memory || IsKnownSubgraphData(node)); if (no_need_assign_memory) { diff --git a/src/ge/graph/build/memory/block_mem_assigner.h b/src/ge/graph/build/memory/block_mem_assigner.h index 7e37fe8e..6137911c 100644 --- a/src/ge/graph/build/memory/block_mem_assigner.h +++ b/src/ge/graph/build/memory/block_mem_assigner.h @@ -259,6 +259,7 @@ class BlockMemAssigner : public MemAssigner { ge::ComputeGraphPtr compute_graph_; std::vector memory_blocks_; + std::vector blocks_store_; std::vector zero_memory_list_; @@ -357,7 +358,7 @@ class BlockMemAssigner : public MemAssigner { bool IsZeroCopyBlock(const NodePtr &node, bool continuous); bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, - uint32_t &peer_input_index); + uint32_t &peer_input_index, bool &no_need_assign_memory); /// /// @ingroup GE diff --git a/src/ge/graph/build/memory/graph_mem_assigner.cc b/src/ge/graph/build/memory/graph_mem_assigner.cc index 583f65d8..1518714f 100644 --- a/src/ge/graph/build/memory/graph_mem_assigner.cc +++ b/src/ge/graph/build/memory/graph_mem_assigner.cc @@ -39,6 +39,33 @@ const size_t kVirtualInputNodeOutputSize = 1; const size_t kVirtualOutputNodeInputSize = 1; const size_t kVirtualNodeDataIndex = 0; const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_"; +int64_t GetSymbolOutputOffset(const std::map &anchor_to_symbol, + const std::map> &symbol_to_anchors, + const ge::NodePtr &node, const uint32_t i) { + ge::NodeIndexIO cur_node_index_io(node, i, ge::kOut); + auto iter1 = anchor_to_symbol.find(cur_node_index_io.ToString()); + if (iter1 == anchor_to_symbol.end()) { + return ge::kInvalidOffset; + } + auto out_symbol = iter1->second; + auto iter2 = symbol_to_anchors.find(out_symbol); + if (iter2 == symbol_to_anchors.end()) { + return ge::kInvalidOffset; + } + for (const auto &node_index_io : iter2->second) { + if (node_index_io.value_ == out_symbol) { + vector output_list = node->GetOpDesc()->GetOutputOffset(); + vector symbol_output_list = node_index_io.node_->GetOpDesc()->GetOutputOffset(); + if (node_index_io.index_ >= symbol_output_list.size()) { + return ge::kInvalidOffset; + } + GELOGD("Node %s %uth output offset is %ld, Symbol %s output offset is %ld.", node->GetName().c_str(), i, + output_list[i], iter2->first.c_str(), symbol_output_list.at(node_index_io.index_)); + return symbol_output_list.at(node_index_io.index_); + } + } + return ge::kInvalidOffset; +} } // namespace namespace ge { Status VariableMemoryAssigner::Assign() { @@ -1191,6 +1218,12 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt } Status GraphMemoryAssigner::CheckOffset() { + std::map anchor_to_symbol; + std::map> symbol_to_anchors; + if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Get ref-mapping for graph %s failed.", compute_graph_->GetName().c_str()); + return FAILED; + } for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { GE_CHECK_NOTNULL(node->GetOpDesc()); vector input_list = node->GetOpDesc()->GetInputOffset(); @@ -1200,13 +1233,26 @@ Status GraphMemoryAssigner::CheckOffset() { return FAILED; } } + + bool need_update_output = false; vector output_list = node->GetOpDesc()->GetOutputOffset(); - for (auto output : output_list) { - if (output == ge::kInvalidOffset) { + for (uint32_t i = 0; i < output_list.size(); ++i) { + if (output_list[i] == ge::kInvalidOffset) { GELOGE(FAILED, "Invalid offset in node: %s output: %ld.", node->GetName().c_str(), ge::kInvalidOffset); return FAILED; } + if (node->GetType() == IDENTITY || node->GetType() == READVARIABLEOP) { + auto symbol_offset = GetSymbolOutputOffset(anchor_to_symbol, symbol_to_anchors, node, i); + if (symbol_offset != ge::kInvalidOffset && output_list[i] != symbol_offset) { + output_list[i] = symbol_offset; + need_update_output = true; + } + } } + if (need_update_output) { + node->GetOpDesc()->SetOutputOffset(output_list); + } + vector workspace_list = node->GetOpDesc()->GetWorkspace(); for (auto workspace : workspace_list) { if (workspace == ge::kInvalidOffset) { diff --git a/src/ge/graph/execute/graph_execute.cc b/src/ge/graph/execute/graph_execute.cc index 25208aa4..e1322180 100644 --- a/src/ge/graph/execute/graph_execute.cc +++ b/src/ge/graph/execute/graph_execute.cc @@ -592,7 +592,17 @@ Status GraphExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigI GELOGW("GetAIPPInfo is not success."); return ret; } + return SUCCESS; +} +Status GraphExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { + auto model_manager = ge::ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + Status ret = model_manager->GetAippType(model_id, index, type, aipp_index); + if (ret != SUCCESS) { + GELOGW("Get aipp type is not success."); + return ret; + } return SUCCESS; } diff --git a/src/ge/graph/execute/graph_execute.h b/src/ge/graph/execute/graph_execute.h index 5cf39bae..242103f8 100644 --- a/src/ge/graph/execute/graph_execute.h +++ b/src/ge/graph/execute/graph_execute.h @@ -75,6 +75,8 @@ class GraphExecutor { static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); + static Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); + /// /// @ingroup ge /// @brief Get dynamic batch_info diff --git a/src/ge/graph/load/new_model_manager/davinci_model.cc b/src/ge/graph/load/new_model_manager/davinci_model.cc index 3c2aaffa..81eb4bc9 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.cc +++ b/src/ge/graph/load/new_model_manager/davinci_model.cc @@ -125,7 +125,7 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptrHasAttr(ATTR_DATA_RELATED_AIPP_MODE)) { + GELOGW("There is no aipp releated info with index %u.", index); + return SUCCESS; + } + std::string data_mode; + (void)AttrUtils::GetStr(data_op, ATTR_DATA_RELATED_AIPP_MODE, data_mode); + if (data_mode == "static_aipp") { + type = DATA_WITH_STATIC_AIPP; + } else if (data_mode == "dynamic_aipp") { + type = DATA_WITH_DYNAMIC_AIPP; + } else if (data_mode == "dynamic_aipp_conf") { + type = DYNAMIC_AIPP_NODE; + } else { + GELOGE(INTERNAL_ERROR, "The info of aipp releated info %s is invalid with index %u.", data_mode.c_str(), index); + return INTERNAL_ERROR; + } + + if (type == DATA_WITH_DYNAMIC_AIPP) { + string releated_name; + (void)AttrUtils::GetStr(data_op, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name); + for (size_t i = 0; i < data_op_list_.size(); ++i) { + GE_CHECK_NOTNULL(data_op_list_[i]); + if (data_op_list_[i]->GetName() == releated_name) { + GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), i, index); + aipp_index = i; + } + } + if (aipp_index == 0xFFFFFFFF) { + GELOGE(INTERNAL_ERROR, "Can not find aipp data node from index %u", index); + return INTERNAL_ERROR; + } + } + return SUCCESS; +} + void DavinciModel::SetDynamicSize(const std::vector &batch_num, int32_t dynamic_type) { batch_size_.clear(); if (batch_num.empty()) { @@ -1666,9 +1708,9 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, return; } // judge if this data is linked dynamic aipp first, multiply batch has been considered - if (op_desc->HasAttr("_dynamic_aipp_input_dims")) { + if (op_desc->HasAttr(ATTR_DYNAMIC_AIPP_INPUT_DIMS)) { vector dynamic_aipp_input_dims; - (void)AttrUtils::GetListInt(op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_input_dims); + (void)AttrUtils::GetListInt(op_desc, ATTR_DYNAMIC_AIPP_INPUT_DIMS, dynamic_aipp_input_dims); SetInputDimsInfo(dynamic_aipp_input_dims, format, input); return; } else { @@ -3371,11 +3413,15 @@ bool DavinciModel::IsBroadCastOpData(const ge::NodePtr &var_node) { /// @return Status /// Status DavinciModel::InitModelStream(rtStream_t stream) { + ExecuteMode curr_mode = is_async_mode_ ? ASYNCHRONIZATION : SYNCHRONIZATION; + GE_CHK_BOOL_RET_STATUS((curr_mode == last_execute_mode_) || (last_execute_mode_ == INITIALIZATION), INTERNAL_ERROR, + "NnExecute not support mix execute."); + last_execute_mode_ = curr_mode; + // asynchronize mode, use user input stream. if (is_async_mode_) { rt_model_stream_ = stream; is_inner_model_stream_ = false; - last_execute_mode_ = true; return SUCCESS; } @@ -3387,14 +3433,12 @@ Status DavinciModel::InitModelStream(rtStream_t stream) { rt_model_stream_ = stream; is_inner_model_stream_ = false; - last_execute_mode_ = false; return SUCCESS; } - if (last_execute_mode_ || (rt_model_stream_ == nullptr)) { + if (rt_model_stream_ == nullptr) { GE_CHK_RT_RET(rtStreamCreateWithFlags(&rt_model_stream_, priority_, RT_STREAM_FORBIDDEN_DEFAULT)); is_inner_model_stream_ = true; - last_execute_mode_ = false; } return SUCCESS; diff --git a/src/ge/graph/load/new_model_manager/davinci_model.h b/src/ge/graph/load/new_model_manager/davinci_model.h index 15f4539f..438fe639 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.h +++ b/src/ge/graph/load/new_model_manager/davinci_model.h @@ -75,6 +75,12 @@ struct timeInfo { int64_t dumpEndTime; }; +enum ExecuteMode { + INITIALIZATION, + SYNCHRONIZATION, + ASYNCHRONIZATION, +}; + // comments class DavinciModel { public: @@ -314,6 +320,8 @@ class DavinciModel { /// Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info); + Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index); + /// /// @ingroup ge /// @brief Get model_id. @@ -884,7 +892,7 @@ class DavinciModel { bool is_inner_model_stream_; bool is_async_mode_; // For NN execute, Async mode use rtMemcpyAsync on rt_model_stream_. - bool last_execute_mode_; + ExecuteMode last_execute_mode_; bool is_stream_list_bind_{false}; bool is_pure_head_stream_{false}; diff --git a/src/ge/graph/load/new_model_manager/model_manager.cc b/src/ge/graph/load/new_model_manager/model_manager.cc index 320bfb16..f6995052 100644 --- a/src/ge/graph/load/new_model_manager/model_manager.cc +++ b/src/ge/graph/load/new_model_manager/model_manager.cc @@ -876,6 +876,14 @@ Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippCo return davinci_model->GetAIPPInfo(index, aipp_info); } +Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { + std::shared_ptr davinci_model = GetModel(model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetAIPPInfo failed, invalid model_id is %u.", + model_id); + + return davinci_model->GetAippType(index, type, aipp_index); +} + Status ModelManager::GenSessionId(uint64_t &session_id) { std::lock_guard lock(session_id_create_mutex_); diff --git a/src/ge/graph/load/new_model_manager/model_manager.h b/src/ge/graph/load/new_model_manager/model_manager.h index e89bfc36..3dce3807 100644 --- a/src/ge/graph/load/new_model_manager/model_manager.h +++ b/src/ge/graph/load/new_model_manager/model_manager.h @@ -224,6 +224,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { /// ge::Status GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); + ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); + /// /// @ingroup domi_ome /// @brief set model input and output size zero copy diff --git a/src/ge/graph/manager/graph_manager.cc b/src/ge/graph/manager/graph_manager.cc index 82108653..39bdee36 100644 --- a/src/ge/graph/manager/graph_manager.cc +++ b/src/ge/graph/manager/graph_manager.cc @@ -2795,11 +2795,18 @@ Status GraphManager::SaveVariables(const Graph &graph, const std::vectorfirst; + auto var_tensor = iter->second.GetTensorDesc(); + var_tensor.SetName(var_name); + iter->second.SetTensorDesc(var_tensor); var_values.emplace_back(iter->second); } } diff --git a/src/ge/graph/optimize/mem_rw_conflict_optimize.cc b/src/ge/graph/optimize/mem_rw_conflict_optimize.cc index 9c166f4d..3c3419ae 100644 --- a/src/ge/graph/optimize/mem_rw_conflict_optimize.cc +++ b/src/ge/graph/optimize/mem_rw_conflict_optimize.cc @@ -491,7 +491,7 @@ Status SplitIdentityAlongAnchor(const OutDataAnchorPtr &out_data_anchor, const I if (input_rw_type == InputRWType::kScopeWriteable || input_rw_type == InputRWType::kWriteable) { auto new_identity = CreateIdentityAfterSrcNode(*pre_node, pre_out_data_anchor->GetIdx()); GE_CHECK_NOTNULL(new_identity); - if (GraphUtils::AddEdge(pre_out_data_anchor, new_identity->GetInDataAnchor(kIdentityAnchorIndex)) != SUCCESS && + if (GraphUtils::AddEdge(pre_out_data_anchor, new_identity->GetInDataAnchor(kIdentityAnchorIndex)) != SUCCESS || GraphUtils::AddEdge(new_identity->GetOutDataAnchor(kIdentityAnchorIndex), peer_in_data_anchor) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to insert Identity between node %s and %s", pre_out_data_anchor->GetOwnerNode()->GetName().c_str(), diff --git a/src/ge/graph/passes/subgraph_pass.cc b/src/ge/graph/passes/subgraph_pass.cc index fbf444fb..fd71e65b 100644 --- a/src/ge/graph/passes/subgraph_pass.cc +++ b/src/ge/graph/passes/subgraph_pass.cc @@ -176,6 +176,9 @@ Status SubgraphPass::WhileInputNodes(const ComputeGraphPtr &graph, const NodePtr GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); NodePtr in_node = peer_out_anchor->GetOwnerNode(); GE_CHECK_NOTNULL(in_node); + if (in_node->GetType() == VARIABLE || in_node->GetType() == VARHANDLEOP || in_node->GetType() == VARIABLEV2) { + continue; + } // Input->While and Input link to other nodes need insert memcpy if (peer_out_anchor->GetPeerInDataAnchors().size() > 1) { GELOGD("Input %s of While %s links to other nodes.", in_node->GetName().c_str(), node->GetName().c_str()); diff --git a/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc index eb936282..545fe66f 100644 --- a/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -124,7 +124,14 @@ Status GetDataDimN(const ge::NodePtr &data_node, ge::Format format, int64_t &bat return PARAM_INVALID; } } - GELOGE(PARAM_INVALID, "when dynamic aipp, shape must be in range [3, 4], but is %zu", shape.size()); + string errormsg = + "its shape size must be in range[3,4] which dynamic aipp is linked, " + "maybe this input is not suitable for dynamic aipp"; + ErrorManager::GetInstance().ATCReportErrMessage( + "E10001", {"parameter", "value", "reason"}, + {data_node->GetName() + " shape size", to_string(shape.size()), errormsg}); + GELOGE(PARAM_INVALID, "The shape size of this node [%s] which linked dynamic aipp must be in range[3, 4], but is %zu", + data_node->GetName().c_str(), shape.size()); return PARAM_INVALID; } @@ -272,7 +279,6 @@ Status AippOp::AddAippAttrbutes(const OpDescPtr &op_desc, const std::string &aip GE_CHK_BOOL_RET_STATUS(AttrUtils::SetInt(op_desc, kCurrentAippIndex, index), INTERNAL_ERROR, "Set kCurrentAippIndex attr for aipp node failed"); - // add input/output desc GeTensorDesc tensor; GE_CHK_GRAPH_STATUS_RET(op_desc->AddInputDesc("images", tensor), "Failed to add input images for aipp node"); @@ -318,6 +324,7 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr GELOGE(PARAM_INVALID, "Get target input node for rank %d failed", rank); return PARAM_INVALID; } + data_node_linked_aipp = data_node; auto data_opdesc = data_node->GetOpDesc(); GE_CHECK_NOTNULL(data_opdesc); string set_dt_str; @@ -330,10 +337,17 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr return PARAM_INVALID; } + // add dynamic or static attr memsage to data + if (GetAippMode() == domi::AippOpParams::static_) { + (void)AttrUtils::SetStr(data_opdesc, ATTR_DATA_RELATED_AIPP_MODE, "static_aipp"); + } else if (GetAippMode() == domi::AippOpParams::dynamic) { + (void)AttrUtils::SetStr(data_opdesc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp"); + } + // In scenario AIPP+CONV2D+POOLING, keep the aipp info to Data, since AIPP disappear after subgraph optimize GeAttrValue::NAMED_ATTRS aipp_attr; ConvertParamToAttr(aipp_attr); - if (!AttrUtils::SetNamedAttrs(data_node->GetOpDesc(), ATTR_NAME_AIPP, aipp_attr)) { + if (!AttrUtils::SetNamedAttrs(data_opdesc, ATTR_NAME_AIPP, aipp_attr)) { GELOGE(INTERNAL_ERROR, "Set name attrs for Data node failed. id: %d", rank); return INTERNAL_ERROR; } @@ -737,7 +751,7 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { data_shape_n = data_op_desc->MutableInputDesc(0)->GetShape().GetDim(0); } vector dynamic_aipp_linked_data_shape{data_shape_n, kDynamicDim, kDynamicDim, kDynamicDim}; - (void)AttrUtils::SetListInt(data_op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_linked_data_shape); + (void)AttrUtils::SetListInt(data_op_desc, ATTR_DYNAMIC_AIPP_INPUT_DIMS, dynamic_aipp_linked_data_shape); int64_t batch_count = -1; if (GetDataDimN(data_node, ori_data_format, batch_count) != ge::SUCCESS) { @@ -759,7 +773,24 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { return AddNodeToGraph(aipp_node, max_dynamic_aipp_size); } +Status AippOp::AddAttrToAippData(const OpDescPtr &aipp_data_op_desc) { + // Add dynamic aipp config to aipp_data + GeAttrValue::NAMED_ATTRS aipp_attr; + ConvertParamToAttr(aipp_attr); + (void)AttrUtils::SetNamedAttrs(aipp_data_op_desc, ATTR_NAME_AIPP, aipp_attr); + (void)AttrUtils::SetStr(aipp_data_op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp_conf"); + + // add node name attr to data linked aipp_data, it can be queried by acl. + GE_CHECK_NOTNULL(data_node_linked_aipp); + auto data_op_desc = data_node_linked_aipp->GetOpDesc(); + GE_CHECK_NOTNULL(data_op_desc); + (void)AttrUtils::SetStr(data_op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, aipp_data_op_desc->GetName()); + (void)AttrUtils::SetStr(aipp_data_op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, data_op_desc->GetName()); + return SUCCESS; +} + Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size) { + static int index = 0; std::vector input_shape_dim(1, max_dynamic_aipp_size); GeShape input_shape(input_shape_dim); // construct input tensor @@ -767,18 +798,21 @@ Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp TensorUtils::SetReuseInput(input_tensor, false); TensorUtils::SetSize(input_tensor, max_dynamic_aipp_size); - // Only flush subgraph name const ComputeGraphPtr &graph = aipp_node->GetOwnerComputeGraph(); - string node_name = (graph->GetParentGraph() == nullptr) ? kDynamicAippData : (graph->GetName() + "_" + node_name); - + string node_name; + if (index == 0) { + node_name = kDynamicAippData; + } else { + node_name = string(kDynamicAippData) + "_" + to_string(index); + } + ++index; // new add aipp_data ops for dynamic aipp param input OpDescPtr op_desc_ptr_data = MakeShared(node_name, AIPPDATA); GE_CHECK_NOTNULL(op_desc_ptr_data); - // Add dynamic aipp config to aipp_data - GeAttrValue::NAMED_ATTRS aipp_attr; - ConvertParamToAttr(aipp_attr); - (void)AttrUtils::SetNamedAttrs(op_desc_ptr_data, ATTR_NAME_AIPP, aipp_attr); + if (AddAttrToAippData(op_desc_ptr_data) != SUCCESS) { + return INTERNAL_ERROR; + } auto stat1 = op_desc_ptr_data->AddInputDesc(input_tensor); diff --git a/src/ge/graph/preprocess/insert_op/ge_aipp_op.h b/src/ge/graph/preprocess/insert_op/ge_aipp_op.h index c98935ee..64c89b62 100644 --- a/src/ge/graph/preprocess/insert_op/ge_aipp_op.h +++ b/src/ge/graph/preprocess/insert_op/ge_aipp_op.h @@ -78,9 +78,11 @@ class AippOp : public InsertOpBase { Status CreateAippData(const NodePtr &aipp); Status AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size); Status AddAippAttrbutes(const OpDescPtr &op_desc, const std::string &aipp_cfg_path, const uint32_t &index); + Status AddAttrToAippData(const OpDescPtr &aipp_data_op_desc); domi::AippOpParams *aipp_params_ = nullptr; ge::NodePtr aipp_node_ = nullptr; + ge::NodePtr data_node_linked_aipp = nullptr; }; } // namespace ge diff --git a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index c55be013..83a16e75 100644 --- a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -22,6 +22,7 @@ #include "common/ge/ge_util.h" #include "common/op/ge_op_utils.h" #include "common/util.h" +#include "common/util/error_manager/error_manager.h" #include "framework/common/debug/ge_log.h" #include "framework/common/debug/log.h" #include "framework/common/ge_inner_error_codes.h" @@ -120,15 +121,15 @@ Status InsertNewOpUtil::CheckPositionNotRepeat() { for (int j = i + 1; j < insert_op_conf_->aipp_op_size(); j++) { const domi::AippOpParams *another_item = insert_op_conf_->mutable_aipp_op(j); - - GE_IF_BOOL_EXEC(item->related_input_rank() != another_item->related_input_rank(), continue;); - - GE_IF_BOOL_EXEC( - item->input_edge_idx_size() == 0 || another_item->input_edge_idx_size() == 0 || - item->input_edge_idx(0) == another_item->input_edge_idx(0), - GELOGE(PARAM_INVALID, - "Can not insert aipp op to the same postion! please check related_input_rank and input_edge_idx."); - return PARAM_INVALID;); + GE_IF_BOOL_EXEC(item->related_input_rank() == another_item->related_input_rank(), + string errormsg = + "Can not insert aipp to the same postion! Please ensure related_input_rank" + " param is different in different aipp config."; + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); + GELOGE(PARAM_INVALID, + "Can not insert aipp op to the same postion! Please ensure related_input_rank param " + "is different in different aipp config."); + return PARAM_INVALID;); } } @@ -162,18 +163,12 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { std::unique_ptr aippParams(new (std::nothrow) domi::AippOpParams()); GE_CHECK_NOTNULL(aippParams); - GE_IF_BOOL_EXEC(aippNodes.size() > 0, GE_CHK_STATUS(GetAippParams(aippParams, aippNodes[0])); - aippMode = (aippMode == domi::AippOpParams::undefined) ? aippParams->aipp_mode() : aippMode; - GE_CHK_BOOL_RET_STATUS(aippMode == aippParams->aipp_mode(), PARAM_INVALID, - "The aipp_mode of all aipp_op must be the same");); GE_IF_BOOL_EXEC( aippNodes.size() > 1, for (decltype(aippNodes)::size_type i = 1; i < aippNodes.size(); i++) { std::unique_ptr currAippParam(new (std::nothrow) domi::AippOpParams()); GE_CHECK_NOTNULL(currAippParam); GE_CHK_STATUS(GetAippParams(currAippParam, aippNodes[i])); - GE_CHK_BOOL_RET_STATUS(aippMode == currAippParam->aipp_mode(), PARAM_INVALID, - "The aipp_mode of all aipp_op must be the same"); if (aippMode == domi::AippOpParams::static_) { GE_CHK_BOOL_RET_STATUS(aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID, "The input_format of all aipp_ops after one Data should be the same"); diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.cc b/src/ge/graph/preprocess/multi_batch_copy_graph.cc index 298e7749..331d9c31 100644 --- a/src/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/src/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -41,6 +41,7 @@ #include "inc/pass_manager.h" #include "graph/common/local_context.h" +using std::map; using std::set; using std::string; using std::vector; @@ -265,27 +266,24 @@ Status MultiBatchGraphCopyer::Init() { } Status MultiBatchGraphCopyer::LabelStatus() { - for (const auto &data : origin_data_nodes_) { - auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); - if (!IsAllDimsPositive(data_shape.GetDims())) { - origin_nodes_status_[data.get()] = kNodeInBatchBranch; - } - } + map> frame_enters; + InitStatus(frame_enters); + bool changed = true; // If anyone of in node is kNodeInBatchBranch, it is also kNodeInBatchBranch while (changed) { changed = false; for (const auto &node : origin_all_nodes_) { - auto iter = origin_nodes_status_.find(node.get()); - if (iter != origin_nodes_status_.end()) { - continue; - } for (auto &in_node : node->GetInAllNodes()) { bool is_in_batch = origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end() && origin_nodes_status_[in_node.get()] == kNodeInBatchBranch; if (is_in_batch) { - origin_nodes_status_[node.get()] = kNodeInBatchBranch; - changed = true; + if (origin_nodes_status_.find(node.get()) == origin_nodes_status_.end() || + origin_nodes_status_[node.get()] != kNodeInBatchBranch) { + origin_nodes_status_[node.get()] = kNodeInBatchBranch; + ResetEnterStatus(frame_enters, node); + changed = true; + } break; } } @@ -316,6 +314,45 @@ Status MultiBatchGraphCopyer::LabelStatus() { return SUCCESS; } +void MultiBatchGraphCopyer::InitStatus(map> &frame_enters) { + for (const auto &node : origin_all_nodes_) { + if (node->GetType() != ENTER && node->GetType() != REFENTER) { + continue; + } + auto op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + continue; + } + string frame_name; + if (AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) { + frame_enters[frame_name].emplace_back(node); + } + } + + for (const auto &data : origin_data_nodes_) { + auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); + if (!IsAllDimsPositive(data_shape.GetDims())) { + origin_nodes_status_[data.get()] = kNodeInBatchBranch; + } + } +} + +void MultiBatchGraphCopyer::ResetEnterStatus(map> &frame_enters, const NodePtr &node) { + if (node->GetType() != ENTER && node->GetType() != REFENTER) { + return; + } + + for (const auto &frame_enter : frame_enters) { + auto &enters = frame_enter.second; + if (std::find(enters.begin(), enters.end(), node) != enters.end()) { + for (const auto &enter : enters) { + origin_nodes_status_[enter.get()] = kNodeInBatchBranch; + } + break; + } + } +} + Status MultiBatchGraphCopyer::CreateNewNodes() { shape_data_ = InsertShapeDataNode(); if (shape_data_ == nullptr) { diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.h b/src/ge/graph/preprocess/multi_batch_copy_graph.h index 062b98d2..f665b65e 100644 --- a/src/ge/graph/preprocess/multi_batch_copy_graph.h +++ b/src/ge/graph/preprocess/multi_batch_copy_graph.h @@ -68,6 +68,8 @@ class MultiBatchGraphCopyer { // label status for origin_all_nodes_ Status LabelStatus(); + void InitStatus(std::map> &frame_enters); + void ResetEnterStatus(std::map> &frame_enters, const NodePtr &node); // add nodes functions Status CreateNewNodes(); diff --git a/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 44fe377a..871f1db4 100644 --- a/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -722,8 +722,15 @@ Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node GE_CHECK_NOTNULL(node_item); auto task_defs = model.GetTaskDefs(node); GE_CHECK_NOTNULL(task_defs); - GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID, "Node[%s] task_def num[%zu] != 1", - node->GetName().c_str(), (*task_defs).size()); + if (node_item->shape_inference_type != DEPEND_COMPUTE) { + GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID, "Node[%s] task_def num[%zu] != 1", + node->GetName().c_str(), (*task_defs).size()); + } else { + // The number of tasks of the fourth type operator may be 2 + GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1 || (*task_defs).size() == 2, PARAM_INVALID, + "Node[%s] DEPEND_COMPUTE task_def num[%zu] != 1 or 2", node->GetName().c_str(), + (*task_defs).size()); + } const auto &task_def = (*task_defs)[0]; std::shared_ptr aicpu_task; if (task_def.type() == RT_MODEL_TASK_KERNEL_EX) { diff --git a/src/ge/session/inner_session.cc b/src/ge/session/inner_session.cc index 3d3adfd8..44c29460 100644 --- a/src/ge/session/inner_session.cc +++ b/src/ge/session/inner_session.cc @@ -18,6 +18,7 @@ #include #include #include +#include "adx_datadump_server.h" #include "common/dump/dump_properties.h" #include "common/util.h" #include "framework/common/debug/ge_log.h" @@ -76,10 +77,12 @@ Status InnerSession::Initialize() { DumpProperties dump_properties; dump_properties.InitByOptions(); + GE_CHK_STATUS_RET(AddDumpProperties(dump_properties), "Add dump properties failed"); ret = graph_manager_.Initialize(options_); if (ret != SUCCESS) { GELOGE(ret, "[InnerSession:%lu] initialize failed.", session_id_); + GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed"); return ret; } @@ -87,6 +90,7 @@ Status InnerSession::Initialize() { if (ret != SUCCESS) { GELOGE(ret, "failed to set malloc size"); (void)graph_manager_.Finalize(); + GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed"); GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); return ret; } @@ -97,6 +101,7 @@ Status InnerSession::Initialize() { ret = VarManager::Instance(session_id_)->Init(version, session_id_, DEFAULT_DEVICE_ID, DEFAULT_JOB_ID); if (ret != SUCCESS) { GELOGE(ret, "failed to init session instance"); + GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed"); } init_flag_ = true; return SUCCESS; @@ -122,6 +127,7 @@ Status InnerSession::Finalize() { (void)VarManager::Instance(session_id_)->FreeVarMemory(); GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); + GE_CHK_STATUS_RET(RemoveDumpProperties(), "Remove dump properties failed"); return ret; } @@ -297,4 +303,27 @@ Status InnerSession::SaveVariables(const Graph &graph, const std::vectorfloat16) scenarios. +* Mixed precision is enabled by default. +* \n +* *@par Attributes: -* @li strides: A list of 4 integers. Specifying the strides of the +*@li strides: Required. A list of 4 integers. Specifying the strides of the * convolution along the height and width. The dimension order is determined * by the data format of "x". By default the N and C dimensions are set to 1. -* @li pads: A list of 4 integers. Specifying the top, bottom, left and right -* padding. -* @li dilations: A list of 4 integers. Specifying the dilation rate to use -* for dilated convolution. Has the same dimension order and value as "strides". -* @li groups: Number of blocked connections from input channels to output -* channels. Input channels and output channels must both be divisible by -* "groups".Type is int32. -* @li offset_x: An optional integer for quantized convolution. Type is int32. Defaults to "0". -* @li data_format: An optional string from: "NHWC", "NCHW". Specifying the -* data format of the input and output images. Type is string. Defaults to "NHWC". Reserved . \n - -*@par Outputs: -* @li y: A 4D Tensor of output images . \n - -*@attention -* @li The parameter scope is listed as follows: -* @verbatim - |Name | Field | Scope - ------------------|--------------|---------- - |Input Image Size | H dimension | [1, 4096] - | | W dimension | [1, 4096] - ------------------|--------------|---------- - |Filter Size | H dimension | [1, 255] - | | W dimension | [1, 255] - ------------------|--------------|---------- - |Stride Size | H dimension | [1, 63] - | | W dimension | [1, 63] - ------------------|--------------|---------- - |Padding Size | top side | [0, 255] - | | bottom side | [0, 255] - | | left side | [0, 255] - | | right side | [0, 255] - ------------------|--------------|---------- - |Dilation Size | H dimension | [1, 255] - | W dimension | [1, 255] +*@li pads: Required. A list of 4 integers. Specifying the top, bottom, left +* and right padding. +* @li dilations: Optional. A list of 4 integers. Specifying the dilation rate +* to use for dilated convolution. Has the same dimension order and value as +* "strides". Defaults to [1, 1, 1, 1]. +* @li groups: Optional. An integer of type int32, for the number of blocked +* connections from input channels to output channels. Input channels and output +* channels must both be divisible by "groups". "x" in_channels must be equal to +* "filter" in_channels * groups. Defaults to 1. +* @li offset_x: Optional. An integer of type int32, for quantized convolution. +* Defaults to 0. +* @li data_format: Reserved and optional. A string from: "NHWC" and "NCHW". +* Specifying the data format of the input and output images. Defaults to +* "NHWC". +*\n +*\n +* The following value range restrictions must be met: +*@verbatim + |Name | Field | Scope + ------------------|----------|---------- + |Input Image Size | H | [1, 4096] + | | W | [1, 4096] + ------------------|----------|---------- + |Filter Size | H | [1, 255] + | | W | [1, 255] + ------------------|----------|---------- + |Stride | H | [1, 63] + | | W | [1, 63] + ------------------|----------|---------- + |Padding | top | [0, 255] + | | bottom | [0, 255] + | | left | [0, 255] + | | right | [0, 255] + ------------------|----------|---------- + |Dilation | H | [1, 255] + | | W | [1, 255] @endverbatim - -* @li There are restrictions for certain scenarios: -* @verbatim - Output | Restrictions - ------------------|---------------------------------------------- - W dimension == 1 | HxW(input) == HxW(filter) - H dimension == 1 | - ------------------|---------------------------------------------- - W dimension == 1 | Not supported - H dimension != 1 | +* +*@par Outputs: +*@li y: A 4D Tensor of output images. Has the same type and format as "x". With +* "NHWC" format, the shape is [batch, out_height, out_width, out_channels]. +*\n +* out_height = (in_height + top_pad + bottom_pad - +* dilation_h * (filter_height - 1) - 1) +* / stride_h + 1 +*\n +* out_width = (in_width + left_pad + right_pad - +* dilation_w * (filter_width - 1) - 1) +* / stride_w + 1 +* +*@attention Constraints: +*@li The following restrictions on the output must be met: +*@verbatim + | Output | Restrictions + -------------------|--------------------------- + | W dimension == 1 | H*W(input) == H*W(filter) + | H dimension == 1 | + -------------------|--------------------------- + | W dimension == 1 | Not supported + | H dimension != 1 | @endverbatim -* As shown above, "HxW(input)" indicates the image size after padding and -* "HxW(filter)" indicates the filter size after dilation . \n - +* "H * W (input)" indicates the image size after padding and "H * W (filter)" +* indicates the filter size after dilation. +*\n +* *@par Quantization supported or not -* Yes - +*@li Yes +* *@par Third-party framework compatibility *@li Compatible with the TensorFlow operator "conv2d". *@li Compatible with the Caffe operator 2D "Convolution". diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index d9c28087..415cc4ef 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -1035,6 +1035,9 @@ REG_OP(ROIPooling) *@par Outputs: * @ decoded_boxes: A Tensor. Must have the same type as box_predictions. * N-D with shape [N, 4]. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DecodeBbox) .INPUT(box_predictions, TensorType{DT_FLOAT16}) @@ -1052,6 +1055,9 @@ REG_OP(DecodeBbox) *@par Outputs: *boxes_output: A Tensor. Must have the same type as boxes_output. N-D with shape [N, 4]. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ClipBoxes) .INPUT(boxes_input, TensorType({DT_FLOAT16})) @@ -1270,6 +1276,9 @@ REG_OP(RpnProposalPostProcessing) * *@par Outputs: * @ boundary_encoded: A Tensor. Must be float16. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DecodeBoundariesTarget) .INPUT(boundary_predictions, TensorType({DT_FLOAT16})) @@ -1287,6 +1296,9 @@ REG_OP(DecodeBoundariesTarget) * *@par Outputs: * @ keypoints_decoded: A Tensor. Must be float16. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DecodeCornerpointsTargetBG) .INPUT(keypoints_prediction, TensorType({DT_FLOAT16})) @@ -1304,6 +1316,9 @@ REG_OP(DecodeCornerpointsTargetBG) * *@par Outputs: * @ keypoints_decoded: A Tensor. Must be float16. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DecodeCornerpointsTargetWrtCenterV1) .INPUT(keypoints_prediction, TensorType({DT_FLOAT16})) @@ -1321,6 +1336,9 @@ REG_OP(DecodeCornerpointsTargetWrtCenterV1) * *@par Outputs: * @ boundary_encoded: A Tensor. Must be float16. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DecodeWheelsTarget) .INPUT(boundary_predictions, TensorType({DT_FLOAT16})) diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index 6d4f6f9d..14949c54 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -335,6 +335,8 @@ REG_OP(LogSoftmaxV2) *@par Outputs: * y: A Tensor of the same type as "grad" . \n +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ConfusionSoftmaxGrad) .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -499,6 +501,9 @@ REG_OP(LayerNorm) * @li pd_x: A Tensor. Must be one of the following types: float16, float32. * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. * @li pd_beta: A Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LayerNormGrad) .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -540,6 +545,9 @@ REG_OP(LayerNormGrad) *@par Outputs: *Three outputs, including: * @li pd_x: A Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LayerNormXBackprop) .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -579,6 +587,9 @@ REG_OP(LayerNormXBackprop) *Three outputs, including: * @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. * @li pd_beta: A Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(LayerNormBetaGammaBackprop) .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -811,6 +822,9 @@ instruction . \n *@par Third-party framework compatibility *@li Compatible with the PyTorch operator GroupNorm. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(GroupNorm) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -862,6 +876,9 @@ Specifies the variance of "x" . \n *@par Third-party framework compatibility *@li Compatible with the PyTorch operator InstanceNorm. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(InstanceNormV2) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h index 4f51a82e..65fb462e 100644 --- a/third_party/fwkacllib/inc/ops/nn_training_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h @@ -2031,6 +2031,9 @@ REG_OP(ApplyAdadeltaD) * Two outputs, including: * @li var: A mutable Tensor has the same type as "var". * @li accum: A mutable Tensor has the same type as "var". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(FusedMulApplyMomentum) .INPUT(var, TensorType::NumberType()) @@ -2079,6 +2082,9 @@ REG_OP(FusedMulApplyMomentum) * @li var: A Tensor has the type float32. * @li var_copy: A Tensor has the type float16. * @li accum: A Tensor has the same type as input "accum". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(FusedMulApplyMomentumExtern) .INPUT(var, TensorType(DT_FLOAT)) @@ -2581,6 +2587,8 @@ REG_OP(SparseApplyAdadeltaD) *@par Attributes: * @li automic_add_mem_size: sizes of workspaces . \n +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(AtomicAddrClean) .ATTR(automic_add_mem_size, ListInt, {}) diff --git a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h index 90628af6..e94dafa7 100644 --- a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h +++ b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h @@ -30,6 +30,9 @@ namespace ge { *@par Outputs: *data: A Tensor of data value. Must be float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NPUAllocFloatStatusOperator) .OUTPUT(data, TensorType({DT_FLOAT})) @@ -43,6 +46,9 @@ REG_OP(NPUAllocFloatStatusOperator) *@par Outputs: *data: A Tensor of data value. Must be float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NPUClearFloatStatusOperator) .INPUT(addr, TensorType{DT_FLOAT}) @@ -57,6 +63,9 @@ REG_OP(NPUClearFloatStatusOperator) *@par Outputs: *data: A Tensor of data value. Must be float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NPUGetFloatStatusOperator) .INPUT(addr, TensorType{DT_FLOAT}) @@ -68,6 +77,9 @@ REG_OP(NPUGetFloatStatusOperator) *@par Outputs: *y: A Tensor of type int32, output eight numbers with a value of zero. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NPUAllocFloatStatus) .OUTPUT(data, TensorType({DT_FLOAT})) @@ -81,6 +93,9 @@ REG_OP(NPUAllocFloatStatus) *@par Outputs: *data: A Tensor of type float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NPUClearFloatStatus) .INPUT(addr, TensorType{DT_FLOAT}) @@ -95,6 +110,9 @@ REG_OP(NPUClearFloatStatus) *@par Outputs: *data: A Tensor of type float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(NPUGetFloatStatus) .INPUT(addr, TensorType{DT_FLOAT}) diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h index 5938941a..4f42008e 100644 --- a/third_party/fwkacllib/inc/ops/pad_ops.h +++ b/third_party/fwkacllib/inc/ops/pad_ops.h @@ -186,6 +186,73 @@ REG_OP(PadD) .OP_END_FACTORY_REG(PadD) /** +*@brief Pads a tensor. + +*@par Inputs: +*Two inputs, including: +* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, +* uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, +* complex128, uint32, uint64. +* @li paddings: A Tensor of type int32 or int64. +* @li constant_values: A optional Tensor of int32 or int64 + +*@par Attributes: +* @li mode: An optional string, Defaults to "constant", indicates paddings mode, +* support "constant", "reflect", "edge" +* @li paddings_contiguous: An optional bool value, Defaults to true. +* If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...] +* If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...] + +*@par Outputs: +*y: A Tensor of the same type as "x". + +*@par Third-party framework compatibility: +* Compatible with ONNX operator Pad. +*/ +REG_OP(PadV3) + .INPUT(x, TensorType::BasicType()) + .INPUT(paddings, TensorType::IndexNumberType()) + .OPTIONAL_INPUT(constant_values, TensorType::BasicType()) + .OUTPUT(y, TensorType::BasicType()) + .ATTR(mode, String, "constant") + .ATTR(paddings_contiguous, Bool, true) + .OP_END_FACTORY_REG(PadV3) + +/** +*@brief Pads a tensor. + +*@par Inputs: +*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32. + +*@par Attributes: +* @li paddings: An required "vector>". +* For each dimension D of input, paddings[D, 0] indicates how many +* values to add before the contents of tensor in that dimension, +* and paddings[D, 1] indicates how many values to add after the +* contents of tensor in that dimension. +* @li constant_values: An optional int value for pad. +* @li mode: An optional string, Defaults to "constant", indicates paddings mode, +* support "constant", "reflect", "edge" +* @li paddings_contiguous: An optional bool value, Defaults to true. +* If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...] +* If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...] + +*@par Outputs: +*y: A Tensor of the same type as "x". + +*@par Third-party framework compatibility: +* Compatible with ONNX operator Pad. +*/ +REG_OP(PadV3D) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) + .REQUIRED_ATTR(paddings, ListListInt) + .ATTR(constant_values, Int, 0) + .ATTR(mode, String, "constant") + .ATTR(paddings_contiguous, Bool, true) + .OP_END_FACTORY_REG(PadV3D) + +/** *@brief Create a diagonal tensor *@par Inputs: @@ -258,6 +325,9 @@ REG_OP(AscendPadding) /** *@brief EmbeddingRankId, traverse the index calculation server and its position in the server . \n +*@par Restrictions: +*Warning:THIS FUNCTION IS DEPRECATED. Please do not use. \n + *@par Inputs: *One input, include: *addr_table: Tensor which last dimension must be 3. For example: [8, 3]. diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h index edec232d..b97d824f 100644 --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -32,7 +32,7 @@ namespace ge { *@par Inputs: *Inputs include: -* @li logits: A Tensor. Must be one of the following types: float32, float64,double. +* @li logits: A Tensor. Must be one of the following types: float16, float, double. 2-D Tensor with shape [batch_size, num_classes]. * @li num_samples: A Tensor of type int32. 0-D. Number of independent samples to draw for each row slice . \n diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index 7a239732..626dda59 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -502,7 +502,7 @@ REG_OP(ReduceMean) *@par Inputs: *One input: -* @li x: A Tensor. Must be one of the following types: float16, float32, int8, uint8 . \n +* @li x: A Tensor. Must be one of the following types: float16, float32 . \n *@par Attributes: *@li axes: The dimensions to reduce. Must be one of the following types: int, list, tuple, NoneType. @@ -521,8 +521,8 @@ REG_OP(ReduceMean) * Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMean instead. */ REG_OP(ReduceMeanD) - .INPUT(x, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT, DT_INT8, DT_UINT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT, DT_INT8, DT_UINT8})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(axes, ListInt) .ATTR(keep_dims, Bool, false) .OP_END_FACTORY_REG(ReduceMeanD) diff --git a/third_party/fwkacllib/inc/ops/resource_variable_ops.h b/third_party/fwkacllib/inc/ops/resource_variable_ops.h index a4d54088..fdc76391 100644 --- a/third_party/fwkacllib/inc/ops/resource_variable_ops.h +++ b/third_party/fwkacllib/inc/ops/resource_variable_ops.h @@ -26,6 +26,21 @@ namespace ge { +/** +*@brief Creates a handle to a Variable resource. \n + +*@par Outputs: +*y:A Tensor of type resource. \n + +*@par Attributes: +* @li container: optional, string. +* @li shared_name: optional, string. +* @li dtype: required, type. +* @li shape: optional, ListInt. \n + +*@see VarHandleOp. +*/ + REG_OP(VarHandleOp) .ATTR(container, String, "") .ATTR(shared_name, String, "") @@ -34,6 +49,19 @@ REG_OP(VarHandleOp) .OUTPUT(y, TensorType({DT_RESOURCE})) .OP_END_FACTORY_REG(VarHandleOp) +/** +*@brief Assigns a new value to a variable. \n + +*@par Inputs: +*resource:Handle to the resource in which to store the variable. +*value:The value to set the new tensor to use. \n + +*@par Attributes: +* @li dtype: required, type. \n + +*@see AssignVariableOp. +*/ + REG_OP(AssignVariableOp) .INPUT(resource, TensorType({DT_RESOURCE})) .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \ @@ -41,6 +69,19 @@ REG_OP(AssignVariableOp) .REQUIRED_ATTR(dtype, Type) .OP_END_FACTORY_REG(AssignVariableOp) +/** +*@brief Adds a value to the current value of a variable. \n + +*@par Inputs: +*resource:Handle to the resource in which to store the variable. +*value:The value by which the variable will be incremented. \n + +*@par Attributes: +* @li dtype: required, type. \n + +*@see AssignAddVariableOp. +*/ + REG_OP(AssignAddVariableOp) .INPUT(resource, TensorType({DT_RESOURCE})) .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \ @@ -48,6 +89,19 @@ REG_OP(AssignAddVariableOp) .REQUIRED_ATTR(dtype, Type) .OP_END_FACTORY_REG(AssignAddVariableOp) +/** +*@brief Subtracts a value to the current value of a variable. \n + +*@par Inputs: +*resource:Handle to the resource in which to store the variable. +*value:The value by which the variable will be incremented. \n + +*@par Attributes: +* @li dtype: required, type. \n + +*@see AssignSubVariableOp. +*/ + REG_OP(AssignSubVariableOp) .INPUT(resource, TensorType({DT_RESOURCE})) .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, \ diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h index 77437aba..e1a83f43 100644 --- a/third_party/fwkacllib/inc/ops/rnn.h +++ b/third_party/fwkacllib/inc/ops/rnn.h @@ -81,6 +81,9 @@ REG_OP(BasicLSTMCell) *@par Outputs: *output_h:A Tensor of output. Must be the type float32. The format must be FRACTAL_Z. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(DynamicLSTM) .INPUT(x, TensorType({DT_FLOAT32})) @@ -306,6 +309,9 @@ REG_OP(LSTMInputGrad) *two outputs: *@li dxt:A 4D Tensor. Must be one of the following types: float16, float32. *@li dht:A 4D Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(BasicLSTMCellInputGrad) .INPUT(dgate, TensorType({DT_FLOAT16})) @@ -328,6 +334,9 @@ REG_OP(BasicLSTMCellInputGrad) *two outputs: *@li dw:A 4D Tensor. Must be one of the following types: float16. *@li db:A 4D Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(BasicLSTMCellWeightGrad) .INPUT(x, TensorType({DT_FLOAT16})) @@ -358,6 +367,9 @@ REG_OP(BasicLSTMCellWeightGrad) *two outputs: *@li dgate:A 4D Tensor. Must be one of the following types: float16. *@li dct_1:A 4D Tensor. Must be one of the following types: float16, float32. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(BasicLSTMCellCStateGrad) .INPUT(c, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -439,6 +451,9 @@ REG_OP(RNN) *two outputs: *@li o_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li h_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(BasicRNNCell) .INPUT(x, TensorType({DT_FLOAT16})) @@ -460,13 +475,13 @@ REG_OP(BasicRNNCell) *@brief: DynamicGRU calculation. *@par Inputs: *seven inputs: \n -*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. -*@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. -*@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. -*@li cw:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. -*@li cb:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. -*@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND. -*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. +*@li w:Must be one of the following types: float16. The format must be FRACTAL_Z. +*@li b:Must be one of the following types: float16, float32. The format must be ND. +*@li cw:Must be one of the following types: float16. The format must be FRACTAL_Z. +*@li cb:Must be one of the following types: float16, float32. The format must be ND. +*@li seq_length:Must be one of the following types: int32. The format must be ND. +*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@par Attributes: *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. @@ -480,11 +495,11 @@ REG_OP(BasicRNNCell) *@par Outputs: *five outputs: \n -*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li r:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li n:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li i:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li n:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -495,7 +510,7 @@ REG_OP(DynamicGRU) .INPUT(b, TensorType({DT_FLOAT16, DT_FLOAT})) .INPUT(cw, TensorType({DT_FLOAT16})) .INPUT(cb, TensorType({DT_FLOAT16, DT_FLOAT})) - .OPTIONAL_INPUT(seq_length, TensorType({DT_UINT32})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -516,13 +531,13 @@ REG_OP(DynamicGRU) *@brief: DynamicGRUV2 calculation. *@par Inputs: *seven inputs: \n -*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. -*@li weight_input:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. -*@li weight_hidden:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_ZN_LSTM. -*@li bias_input:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. -*@li bias_hidden:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. -*@li seq_length:A 1D Tensor. Must be one of the following types: int32. The format must be ND. -*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. +*@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z. +*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. +*@li bias_input:Must be one of the following types: float16, float32. The format must be ND. +*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. +*@li seq_length:Must be one of the following types: int32. The format must be ND. +*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@par Attributes: *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. @@ -538,12 +553,12 @@ REG_OP(DynamicGRU) *@par Outputs: *six outputs: \n -*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -554,7 +569,7 @@ REG_OP(DynamicGRUV2) .INPUT(weight_hidden, TensorType({DT_FLOAT16})) .OPTIONAL_INPUT(bias_input, TensorType({DT_FLOAT16, DT_FLOAT})) .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) - .OPTIONAL_INPUT(seq_length, TensorType({DT_UINT32})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index d17e8e94..613ce358 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -1787,6 +1787,9 @@ REG_OP(TileWithAxis) *@par Outputs: *y: A Tensor of the same type as "x". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ReadSelect) .INPUT(x, TensorType::ALL()) @@ -1802,6 +1805,9 @@ REG_OP(ReadSelect) *@par Outputs: *y: A Tensor. Has the same type as "x". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(WriteSelect) .INPUT(x, TensorType::ALL()) diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index 5414f122..edc55820 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -625,6 +625,9 @@ REG_OP(ConfusionTransposeD) *@par Outputs: *y: A Tensor. Has the same type as "x". + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(ConfusionTranspose) .INPUT(x, TensorType::BasicType()) diff --git a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h index 2f014937..c96b96be 100644 --- a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h +++ b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h @@ -28,6 +28,9 @@ namespace ge { /** *@brief Applies a perspective transformation to an image . \n +*@par Restrictions: +*Warning:THIS FUNCTION IS DEPRECATED. Please do not use. \n + *@par Inputs: *@li x: input tensor, format NCHW, type must be float. *@li matrix: transformation matrix, format ND , shape must be (N, 9), type must be float . \n diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index 8e159dd7..8c1a4326 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -159,7 +159,12 @@ typedef struct rtAiCoreMemorySize { * @ingroup dvrt_mem * @brief memory type */ -typedef enum tagRtMemoryType { RT_MEMORY_TYPE_HOST = 1, RT_MEMORY_TYPE_DEVICE = 2 } rtMemoryType_t; +typedef enum tagRtMemoryType { + RT_MEMORY_TYPE_HOST = 1, + RT_MEMORY_TYPE_DEVICE = 2 , + RT_MEMORY_TYPE_SVM = 3, + RT_MEMORY_TYPE_DVPP = 4 +} rtMemoryType_t; /** * @ingroup dvrt_mem @@ -167,8 +172,8 @@ typedef enum tagRtMemoryType { RT_MEMORY_TYPE_HOST = 1, RT_MEMORY_TYPE_DEVICE = */ typedef struct tagRtPointerAttributes { rtMemoryType_t memoryType; // host memory or device memory + rtMemoryType_t locationType; uint32_t deviceID; // device ID - uint32_t isManaged; uint32_t pageSize; } rtPointerAttributes_t; diff --git a/third_party/fwkacllib/inc/tdt/status.h b/third_party/fwkacllib/inc/tdt/status.h index 87ae8f75..185d2b9c 100644 --- a/third_party/fwkacllib/inc/tdt/status.h +++ b/third_party/fwkacllib/inc/tdt/status.h @@ -100,6 +100,8 @@ enum { TDT_TSD_SEND_HEARTBEAT_FAILED_CODE, TDT_TSD_CLEAN_RESOURCE_FAILED_CODE, TDT_TSD_SEND_MSG_FAILED_CODE, + TDT_TSD_AICPU_SD_PROCESS_ABNORMAL_CODE, + TDT_TSD_CUSTOM_PROCESS_ABNORMAL_CODE, TDT_PPC_DRIVER_INIT_FAIL_CODE, TDT_PPC_SERVER_CLIENT_CREATE_FAIL_CODE, TDT_PPC_SERVER_CLIENT_DESTORY_FAIL_CODE, @@ -510,6 +512,8 @@ TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_INIT_HDCSERVER_FAILED, " TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_SEND_HEARTBEAT_FAILED, "Tsdaemon get pid fail"); TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_CLEAN_RESOURCE_FAILED, "Tsdaemon clean resource fail"); TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_SEND_MSG_FAILED, "Tsdaemon send msg fail"); +TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_ERROR, TDT_TSD_AICPU_SD_PROCESS_ABNORMAL, "aicpu_sd process abnormal"); +TDT_DEF_ERROR_CODE(MODID_TSD_SERVER, TDT_INFO, TDT_TSD_CUSTOM_PROCESS_ABNORMAL, "custom_aicpu_sd process abnormal"); /********************* PPC ****************************/ // create PPC error level error diff --git a/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h new file mode 100644 index 00000000..a1c39a51 --- /dev/null +++ b/third_party/fwkacllib/inc/toolchain/adx_datadump_server.h @@ -0,0 +1,36 @@ +/** +* @file adx_datadump_server.h +* +* Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#ifndef ADX_DATADUMP_SERVER_H +#define ADX_DATADUMP_SERVER_H +#ifdef __cplusplus +extern "C" { +#endif +/** + * @brief initialize server for normal datadump function. + * @return + * IDE_DAEMON_OK: datadump server init success + * IDE_DAEMON_ERROR: datadump server init failed + */ +int AdxDataDumpServerInit(); + +/** + * @brief uninitialize server for normal datadump function. + * @return + * IDE_DAEMON_OK: datadump server uninit success + * IDE_DAEMON_ERROR: datadump server uninit failed + */ +int AdxDataDumpServerUnInit(); + +#ifdef __cplusplus +} +#endif +#endif +