@@ -68,11 +68,12 @@ elseif(DEFINED ENV{D_LINK_PATH}) | |||||
find_library(slog libslog.so ${GE_LIB_PATH}) | find_library(slog libslog.so ${GE_LIB_PATH}) | ||||
find_library(mmpa libmmpa.so ${GE_LIB_PATH}) | find_library(mmpa libmmpa.so ${GE_LIB_PATH}) | ||||
find_library(runtime libruntime.so ${GE_LIB_PATH}) | find_library(runtime libruntime.so ${GE_LIB_PATH}) | ||||
find_library(msprof libmsprof.so ${GE_LIB_PATH}) | |||||
find_library(msprof libmsprofiler.a ${GE_LIB_PATH}) | |||||
find_library(register libregister.so ${GE_LIB_PATH}) | find_library(register libregister.so ${GE_LIB_PATH}) | ||||
find_library(hccl libhccl.so ${GE_LIB_PATH}) | find_library(hccl libhccl.so ${GE_LIB_PATH}) | ||||
find_library(resource libresource.so ${GE_LIB_PATH}) | find_library(resource libresource.so ${GE_LIB_PATH}) | ||||
find_library(error_manager liberror_manager.so ${GE_LIB_PATH}) | find_library(error_manager liberror_manager.so ${GE_LIB_PATH}) | ||||
find_library(adump_server libadump_server.a ${GE_LIB_PATH}) | |||||
else() | else() | ||||
# Ascend mode | # Ascend mode | ||||
if(DEFINED ENV{ASCEND_CUSTOM_PATH}) | if(DEFINED ENV{ASCEND_CUSTOM_PATH}) | ||||
@@ -84,13 +85,14 @@ else() | |||||
set(ASCEND_RUNTIME_DIR ${ASCEND_DIR}/fwkacllib/lib64) | set(ASCEND_RUNTIME_DIR ${ASCEND_DIR}/fwkacllib/lib64) | ||||
find_library(slog libslog.so ${ASCEND_DRIVER_DIR}) | find_library(slog libslog.so ${ASCEND_DRIVER_DIR}) | ||||
find_library(mmpa libmmpa.so ${ASCEND_DRIVER_DIR}) | find_library(mmpa libmmpa.so ${ASCEND_DRIVER_DIR}) | ||||
find_library(msprof libmsprof.so ${ASCEND_DRIVER_DIR}) | |||||
find_library(msprof libmsprofiler.a ${ASCEND_RUNTIME_DIR}) | |||||
find_library(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | find_library(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | ||||
find_library(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | find_library(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | ||||
find_library(register libregister.so ${ASCEND_RUNTIME_DIR}) | find_library(register libregister.so ${ASCEND_RUNTIME_DIR}) | ||||
find_library(resource libresource.so ${ASCEND_RUNTIME_DIR}) | find_library(resource libresource.so ${ASCEND_RUNTIME_DIR}) | ||||
find_library(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) | find_library(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) | ||||
find_library(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | |||||
endif() | endif() | ||||
# add compile flags | # add compile flags | ||||
@@ -0,0 +1,102 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_EXTERNAL_GE_GE_PROF_H_ | |||||
#define INC_EXTERNAL_GE_GE_PROF_H_ | |||||
#include <map> | |||||
#include <string> | |||||
#include <vector> | |||||
#include "ge/ge_api_error_codes.h" | |||||
namespace ge { | |||||
enum ProfDataTypeConfig { | |||||
kProfTaskTime = 0x0002, | |||||
kProfAiCoreMetrics = 0x0004, | |||||
kProfAicpuTrace = 0x0008, | |||||
kProfTrainingTrace = 0x0800, | |||||
kProfHcclTrace = 0x1000 | |||||
}; | |||||
enum ProfilingAicoreMetrics { | |||||
kAicoreArithmaticThroughput = 0, | |||||
kAicorePipeline = 1, | |||||
kAicoreSynchronization = 2, | |||||
kAicoreMemory = 3, | |||||
kAicoreInternalMemory = 4, | |||||
kAicoreStall = 5 | |||||
}; | |||||
typedef struct ProfAicoreEvents ProfAicoreEvents; | |||||
typedef struct aclgrphProfConfig aclgrphProfConfig; | |||||
/// | |||||
/// @ingroup AscendCL | |||||
/// @brief Initialize the profiling and set profiling configuration path | |||||
/// @param [in] profiler_path: configuration path of profiling | |||||
/// @param [in] length: length of configuration path | |||||
/// @return Status result of function | |||||
/// | |||||
Status aclgrphProfInit(const char *profiler_path, uint32_t length); | |||||
/// | |||||
/// @ingroup AscendCL | |||||
/// @brief Finalize profiling | |||||
/// @return Status result of function | |||||
/// | |||||
Status aclgrphProfFinalize(); | |||||
/// | |||||
/// @ingroup AscendCL | |||||
/// @brief Create data of type aclgrphProfConfig | |||||
/// @param [in] deviceid_list: device id list | |||||
/// @param [in] device_nums: device numbers | |||||
/// @param [in] aicore_metrics: type of aicore metrics | |||||
/// @param [in] aicore_events: pointer to aicore events be reserved, only support NULL now | |||||
/// @param [in] data_type_config: modules need profiling | |||||
/// @return Status result of function | |||||
/// | |||||
aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t device_nums, | |||||
ProfilingAicoreMetrics aicore_metrics, ProfAicoreEvents *aicore_events, | |||||
uint64_t data_type_config); | |||||
/// | |||||
/// @ingroup AscendCL | |||||
/// @brief Destroy data of type aclgrphProfConfig | |||||
/// @param [in] profiler_config: config of profiling | |||||
/// @return Status result of function | |||||
/// | |||||
Status aclgrphProfDestroyConfig(aclgrphProfConfig *profiler_config); | |||||
/// | |||||
/// @ingroup AscendCL | |||||
/// @brief Start profiling of modules which is configured by profiler config | |||||
/// @param [in] profiler_config: config of profiling | |||||
/// @return Status result of function | |||||
/// | |||||
Status aclgrphProfStart(aclgrphProfConfig *profiler_config); | |||||
/// | |||||
/// @ingroup AscendCL | |||||
/// @brief Stop profiling of modules which is configured by profiler config | |||||
/// @param [in] profiler_config: config of profiling | |||||
/// @return Status result of function | |||||
/// | |||||
Status aclgrphProfStop(aclgrphProfConfig *profiler_config); | |||||
} // namespace ge | |||||
#endif // INC_EXTERNAL_GE_GE_PROF_H_ |
@@ -97,6 +97,7 @@ GE_ERRORNO_COMMON(INTERNAL_ERROR, 4, "Internal errors"); // 1343225 | |||||
GE_ERRORNO_COMMON(CSEC_ERROR, 5, "Failed to call libc_sec API!"); // 1343225861 | GE_ERRORNO_COMMON(CSEC_ERROR, 5, "Failed to call libc_sec API!"); // 1343225861 | ||||
GE_ERRORNO_COMMON(TEE_ERROR, 6, "Failed to call tee API!"); // 1343225862 | GE_ERRORNO_COMMON(TEE_ERROR, 6, "Failed to call tee API!"); // 1343225862 | ||||
GE_ERRORNO_COMMON(END_OF_SEQUENCE, 7, "End of sequence!"); // 1343225863 | GE_ERRORNO_COMMON(END_OF_SEQUENCE, 7, "End of sequence!"); // 1343225863 | ||||
GE_ERRORNO_COMMON(PATH_INVALID, 8, "Path is invalid!"); // 1343225864 | |||||
// Error code for plugin manager | // Error code for plugin manager | ||||
GE_ERRORNO_COMMON(GE_PLGMGR_PATH_INVALID, 30, "Path is invalid!"); // 1343225886 | GE_ERRORNO_COMMON(GE_PLGMGR_PATH_INVALID, 30, "Path is invalid!"); // 1343225886 | ||||
@@ -124,9 +125,13 @@ GE_ERRORNO_CLIENT(GE_CLI_GE_ALREADY_INITIALIZED, 10, "GE is already initialized. | |||||
GE_ERRORNO_CLIENT(GE_CLI_GE_NOT_INITIALIZED, 11, "GE is not yet initialized or is finalized."); // 1343229963 | GE_ERRORNO_CLIENT(GE_CLI_GE_NOT_INITIALIZED, 11, "GE is not yet initialized or is finalized."); // 1343229963 | ||||
// Init module error code definition | // Init module error code definition | ||||
GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported."); // 1343234048 | |||||
GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization."); // 1343234049 | |||||
GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported."); // 1343234050 | |||||
GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported."); // 1343234048 | |||||
GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization."); // 1343234049 | |||||
GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported."); // 1343234050 | |||||
GE_ERRORNO_INIT(GE_PROF_MULTI_INIT, 3, "Multiple profiling initializations are not supported."); // 1343234051 | |||||
GE_ERRORNO_INIT(GE_PROF_NOT_INIT, 4, "Profing initializations have not been done."); // 1343234052 | |||||
GE_ERRORNO_INIT(GE_PROF_MODE_CONFLICT, 5, | |||||
"Profiling command mode which is preferred is running, the api mode will not work."); // 1343234053 | |||||
// Session module error code definition | // Session module error code definition | ||||
GE_ERRORNO_SESSION(GE_SESS_INIT_FAILED, 0, "Failed to initialize session."); // 1343238144 | GE_ERRORNO_SESSION(GE_SESS_INIT_FAILED, 0, "Failed to initialize session."); // 1343238144 | ||||
@@ -48,6 +48,8 @@ enum OpEngineType { | |||||
ENGINE_AIVECTOR = 4 // not support | ENGINE_AIVECTOR = 4 // not support | ||||
}; | }; | ||||
enum InputAippType { DATA_WITHOUT_AIPP = 0, DATA_WITH_STATIC_AIPP, DATA_WITH_DYNAMIC_AIPP, DYNAMIC_AIPP_NODE }; | |||||
const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | ||||
const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | ||||
@@ -61,8 +61,10 @@ class StringUtils { | |||||
/// @param [in] delim separator | /// @param [in] delim separator | ||||
/// @return string array after segmentation | /// @return string array after segmentation | ||||
/// | /// | ||||
/*lint -e1077*/ | |||||
static std::vector<std::string> Split(const std::string &str, char delim) { | static std::vector<std::string> Split(const std::string &str, char delim) { | ||||
std::vector<std::string> elems; | std::vector<std::string> elems; | ||||
/*lint +e1077*/ | |||||
if (str.empty()) { | if (str.empty()) { | ||||
elems.emplace_back(""); | elems.emplace_back(""); | ||||
@@ -398,6 +398,24 @@ bool CheckOutputPathValid(const std::string &file_path, const std::string &atc_p | |||||
/// @param [out] result | /// @param [out] result | ||||
/// | /// | ||||
bool ValidateStr(const std::string &filePath, const std::string &mode); | bool ValidateStr(const std::string &filePath, const std::string &mode); | ||||
/// | |||||
/// @ingroup domi_common | |||||
/// @brief Check whether the file is normal file. | |||||
/// @param [in] file_path file path | |||||
/// @param [out] result | |||||
/// | |||||
bool IsValidFile(const char *file_path); | |||||
/// | |||||
/// @ingroup domi_common | |||||
/// @brief Check path invalid | |||||
/// @param [in] path, path to be checked | |||||
/// @param [in] length, length of path | |||||
/// @return 0 success | |||||
/// @return -1 fail | |||||
/// | |||||
Status CheckPath(const char *path, size_t length); | |||||
} // namespace ge | } // namespace ge | ||||
#endif // INC_FRAMEWORK_COMMON_UTIL_H_ | #endif // INC_FRAMEWORK_COMMON_UTIL_H_ |
@@ -163,6 +163,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||||
ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | ||||
ge::Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info); | ge::Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info); | ||||
ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); | |||||
ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | ||||
std::vector<ge::TensorDesc> &output_desc); | std::vector<ge::TensorDesc> &output_desc); | ||||
@@ -92,6 +92,9 @@ struct OmgContext { | |||||
std::map<std::string, std::vector<int32_t>> out_nodes_map; | std::map<std::string, std::vector<int32_t>> out_nodes_map; | ||||
// user-designate out nodes (this is used for determing the orders) | // user-designate out nodes (this is used for determing the orders) | ||||
std::vector<std::pair<std::string, int32_t>> user_out_nodes; | std::vector<std::pair<std::string, int32_t>> user_out_nodes; | ||||
// save the output node of the network, value = topName, | |||||
// topName indicates the output name of the operator. | |||||
std::vector<std::string> user_out_nodes_top_vec; | |||||
// net out nodes (where user_out_nodes or leaf nodes) | // net out nodes (where user_out_nodes or leaf nodes) | ||||
std::vector<std::string> net_out_nodes; | std::vector<std::string> net_out_nodes; | ||||
// net out nodes top names(only caffe has top) | // net out nodes top names(only caffe has top) | ||||
@@ -141,6 +141,11 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_OUTPUTS; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_OUTPUTS; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_DIMS; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_DIMS; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_AIPP_INPUT_DIMS; | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DATA_RELATED_AIPP_MODE; | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DATA_AIPP_DATA_NAME_MAP; | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_GRAPH_HAS_BEEN_ADDED; | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_GRAPH_NAME; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_GRAPH_NAME; | ||||
@@ -1047,6 +1052,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_FLAG; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_FLAG; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_MODE; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_MODE; | ||||
// op dynamic input | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_INPUT_START; | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_INPUT_END; | |||||
// functional ops attr | // functional ops attr | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_THEN_BRANCH; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_THEN_BRANCH; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_ELSE_BRANCH; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_ELSE_BRANCH; | ||||
@@ -235,7 +235,8 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder { | |||||
vector<string> GetOpInferDepends() const; | vector<string> GetOpInferDepends() const; | ||||
string GetInputNameByIndex(uint32_t index) const; | string GetInputNameByIndex(uint32_t index) const; | ||||
string GetValidInputNameByIndex(uint32_t index) const; | |||||
int GetValidInputIndexByName(const string &name) const; | |||||
int GetInputIndexByName(const string &name) const; | int GetInputIndexByName(const string &name) const; | ||||
string GetOutputNameByIndex(uint32_t index) const; | string GetOutputNameByIndex(uint32_t index) const; | ||||
@@ -22,8 +22,10 @@ | |||||
template <class E, class O> | template <class E, class O> | ||||
class RangeVistor { | class RangeVistor { | ||||
public: | public: | ||||
/*lint -e151*/ | |||||
using Iterator = typename std::vector<E>::iterator; | using Iterator = typename std::vector<E>::iterator; | ||||
using ConstIterator = typename std::vector<E>::const_iterator; | using ConstIterator = typename std::vector<E>::const_iterator; | ||||
/*lint +e151*/ | |||||
RangeVistor(O owner, const std::vector<E> &vs) : owner_(owner), elements_(vs) {} | RangeVistor(O owner, const std::vector<E> &vs) : owner_(owner), elements_(vs) {} | ||||
@@ -41,7 +43,9 @@ class RangeVistor { | |||||
bool empty() const { return elements_.empty(); } | bool empty() const { return elements_.empty(); } | ||||
/*lint -e659*/ | |||||
E &at(std::size_t index) { return elements_.at(index); } | E &at(std::size_t index) { return elements_.at(index); } | ||||
/*lint +e659*/ | |||||
const E &at(std::size_t index) const { return elements_.at(index); } | const E &at(std::size_t index) const { return elements_.at(index); } | ||||
@@ -23,6 +23,7 @@ | |||||
#include <vector> | #include <vector> | ||||
#include "external/graph/ge_error_codes.h" | #include "external/graph/ge_error_codes.h" | ||||
#include "external/graph/tensor.h" | #include "external/graph/tensor.h" | ||||
#include "ge_attr_value.h" | |||||
namespace ge { | namespace ge { | ||||
class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY RuntimeInferenceContext { | class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY RuntimeInferenceContext { | ||||
@@ -32,10 +33,12 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY RuntimeInferenceContext { | |||||
static void DestroyContext(const std::string &context_id); | static void DestroyContext(const std::string &context_id); | ||||
graphStatus SetTensor(int64_t node_id, int output_id, Tensor &&tensor); | graphStatus SetTensor(int64_t node_id, int output_id, Tensor &&tensor); | ||||
graphStatus GetTensor(int64_t node_id, int output_id, GeTensorPtr &tensor); | |||||
graphStatus GetTensor(int64_t node_id, int output_id, Tensor &tensor); | graphStatus GetTensor(int64_t node_id, int output_id, Tensor &tensor); | ||||
private: | private: | ||||
std::map<int64_t, std::vector<Tensor>> tensors_; | std::map<int64_t, std::vector<Tensor>> tensors_; | ||||
std::map<int64_t, std::vector<GeTensorPtr>> ge_tensors_; | |||||
std::mutex mu_; | std::mutex mu_; | ||||
static std::map<std::string, std::unique_ptr<RuntimeInferenceContext>> contexts_; | static std::map<std::string, std::unique_ptr<RuntimeInferenceContext>> contexts_; | ||||
@@ -53,6 +53,7 @@ class OpDescUtils { | |||||
static vector<GeTensorPtr> MutableWeights(const ge::NodePtr node); | static vector<GeTensorPtr> MutableWeights(const ge::NodePtr node); | ||||
static graphStatus SetWeights(ge::Node& node, const vector<ge::GeTensorPtr>& weights); | static graphStatus SetWeights(ge::Node& node, const vector<ge::GeTensorPtr>& weights); | ||||
static graphStatus SetWeights(ge::NodePtr node, const vector<ge::GeTensorPtr>& weights); | static graphStatus SetWeights(ge::NodePtr node, const vector<ge::GeTensorPtr>& weights); | ||||
static graphStatus SetWeights(ge::Node& node, const map<int, ge::GeTensorPtr>& weights_map); | |||||
static graphStatus ClearWeights(ge::NodePtr node); | static graphStatus ClearWeights(ge::NodePtr node); | ||||
static bool ClearInputDesc(ge::OpDescPtr op_desc, uint32_t index); | static bool ClearInputDesc(ge::OpDescPtr op_desc, uint32_t index); | ||||
@@ -24,7 +24,7 @@ file(GLOB_RECURSE PROTO_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
"../../proto/task.proto" | "../../proto/task.proto" | ||||
"../../proto/fwk_adaper.proto" | "../../proto/fwk_adaper.proto" | ||||
"../../proto/op_mapping_info.proto" | "../../proto/op_mapping_info.proto" | ||||
"../proto/dump_task.proto" | |||||
"../../proto/dump_task.proto" | |||||
) | ) | ||||
file(GLOB_RECURSE ONNX_PROTO_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | file(GLOB_RECURSE ONNX_PROTO_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | ||||
@@ -658,7 +658,7 @@ ComputeGraph::UpdateOutputMapping(const std::map<uint32_t, uint32_t> &output_map | |||||
return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
} | } | ||||
size_t num = op_desc->GetInputsSize(); | |||||
size_t num = op_desc->GetAllInputsSize(); | |||||
for (size_t i = 0; i < num; i++) { | for (size_t i = 0; i < num; i++) { | ||||
GeTensorDesc tensor = op_desc->GetInputDesc(i); | GeTensorDesc tensor = op_desc->GetInputDesc(i); | ||||
uint32_t cur_index = 0; | uint32_t cur_index = 0; | ||||
@@ -28,7 +28,7 @@ using std::unordered_set; | |||||
void AttrHolder::CopyAttrsFrom(const AttrHolder &holder) { MutableAttrMap().CopyValueFrom(holder.GetAttrMap()); } | void AttrHolder::CopyAttrsFrom(const AttrHolder &holder) { MutableAttrMap().CopyValueFrom(holder.GetAttrMap()); } | ||||
graphStatus AttrHolder::SetAttr(const std::string &name, const GeAttrValue &value) { | graphStatus AttrHolder::SetAttr(const std::string &name, const GeAttrValue &value) { | ||||
if (value.IsEmpty()) { | if (value.IsEmpty()) { | ||||
GELOGE(GRAPH_FAILED, "value is empty, key %s", name.c_str()); | |||||
GELOGE(GRAPH_FAILED, "value is empty, key of the attr is %s", name.c_str()); | |||||
return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
} | } | ||||
auto proto_map = MutableAttrMap().GetProtoMsg(); | auto proto_map = MutableAttrMap().GetProtoMsg(); | ||||
@@ -149,9 +149,10 @@ graphStatus FormatRefiner::GetAnchorPoints(const ge::ComputeGraphPtr &graph, std | |||||
// consider special node save process | // consider special node save process | ||||
// get all input desc format | // get all input desc format | ||||
bool node_is_all_nd = false; | bool node_is_all_nd = false; | ||||
auto input_size = static_cast<uint32_t>(op_desc->GetInputsSize()); | |||||
auto input_size = static_cast<uint32_t>(op_desc->GetAllInputsSize()); | |||||
for (uint32_t i = 0; i < input_size; i++) { | for (uint32_t i = 0; i < input_size; i++) { | ||||
// Operator pre-set format but not origin format | // Operator pre-set format but not origin format | ||||
GE_IF_BOOL_EXEC(op_desc->MutableInputDesc(i) == nullptr, continue); | |||||
auto input_format = op_desc->MutableInputDesc(i)->GetFormat(); | auto input_format = op_desc->MutableInputDesc(i)->GetFormat(); | ||||
// Pre-save data node (only main graph data) and default infer fail | // Pre-save data node (only main graph data) and default infer fail | ||||
if (node_ptr->GetType() == DATA) { | if (node_ptr->GetType() == DATA) { | ||||
@@ -164,6 +165,7 @@ graphStatus FormatRefiner::GetAnchorPoints(const ge::ComputeGraphPtr &graph, std | |||||
// Get all output desc format | // Get all output desc format | ||||
auto output_size = static_cast<uint32_t>(op_desc->GetOutputsSize()); | auto output_size = static_cast<uint32_t>(op_desc->GetOutputsSize()); | ||||
for (uint32_t i = 0; i < output_size; i++) { | for (uint32_t i = 0; i < output_size; i++) { | ||||
GE_IF_BOOL_EXEC(op_desc->MutableOutputDesc(i) == nullptr, continue); | |||||
auto output_format = op_desc->MutableOutputDesc(i)->GetFormat(); | auto output_format = op_desc->MutableOutputDesc(i)->GetFormat(); | ||||
if (output_format != FORMAT_ND && output_format != FORMAT_RESERVED) { | if (output_format != FORMAT_ND && output_format != FORMAT_RESERVED) { | ||||
node_is_all_nd = true; | node_is_all_nd = true; | ||||
@@ -222,8 +224,9 @@ graphStatus FormatRefiner::BackInferProcess(std::deque<ge::NodePtr> &nodes, ge:: | |||||
for (const auto &in_anchor : node->GetAllInDataAnchors()) { | for (const auto &in_anchor : node->GetAllInDataAnchors()) { | ||||
GELOGD("Node is [%s] [B]", (node->GetName()).c_str()); | GELOGD("Node is [%s] [B]", (node->GetName()).c_str()); | ||||
auto in_data_anchor_idx = in_anchor->GetIdx(); | auto in_data_anchor_idx = in_anchor->GetIdx(); | ||||
auto to_be_set_format = | |||||
node->GetOpDesc()->MutableInputDesc(static_cast<uint32_t>(in_data_anchor_idx))->GetOriginFormat(); | |||||
auto input_desc = node->GetOpDesc()->MutableInputDesc(static_cast<uint32_t>(in_data_anchor_idx)); | |||||
GE_IF_BOOL_EXEC(input_desc == nullptr, continue); | |||||
auto to_be_set_format = input_desc->GetOriginFormat(); | |||||
if (to_be_set_format == FORMAT_ND) { | if (to_be_set_format == FORMAT_ND) { | ||||
GELOGD("Node [%s] [B], format is ND", (node->GetName()).c_str()); | GELOGD("Node [%s] [B], format is ND", (node->GetName()).c_str()); | ||||
continue; | continue; | ||||
@@ -122,6 +122,11 @@ const std::string ATTR_NAME_AIPP_INPUTS = "_aipp_inputs"; | |||||
const std::string ATTR_NAME_AIPP_OUTPUTS = "_aipp_outputs"; | const std::string ATTR_NAME_AIPP_OUTPUTS = "_aipp_outputs"; | ||||
const std::string ATTR_NAME_INPUT_DIMS = "input_dims"; | const std::string ATTR_NAME_INPUT_DIMS = "input_dims"; | ||||
const std::string ATTR_DYNAMIC_AIPP_INPUT_DIMS = "_dynamic_aipp_input_dims"; | |||||
const std::string ATTR_DATA_RELATED_AIPP_MODE = "_data_related_aipp_mode"; | |||||
const std::string ATTR_DATA_AIPP_DATA_NAME_MAP = "_data_aipp_data_name_map"; | |||||
const std::string ATTR_NAME_GRAPH_HAS_BEEN_ADDED = "_graph_has_been_added"; | |||||
const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id"; | const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id"; | ||||
const std::string ATTR_NAME_PARENT_GRAPH_NAME = "_parent_graph_name"; | const std::string ATTR_NAME_PARENT_GRAPH_NAME = "_parent_graph_name"; | ||||
@@ -1055,6 +1060,10 @@ const std::string ATTR_NAME_HCCL_FUSED_FLAG = "_hccl_fused_node"; | |||||
const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR = "_alloc_fixed_addr"; | const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR = "_alloc_fixed_addr"; | ||||
const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX = "_alloc_fixed_addr_index"; | const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX = "_alloc_fixed_addr_index"; | ||||
// op dynamic input | |||||
const std::string ATTR_NAME_DYNAMIC_INPUT_START = "_dynamic_input_index_start"; | |||||
const std::string ATTR_NAME_DYNAMIC_INPUT_END = "_dynamic_input_index_end"; | |||||
// atc user def dtype&format | // atc user def dtype&format | ||||
const std::string ATTR_ATC_USER_DEFINE_DATATYPE = "_user_defined_data_type"; | const std::string ATTR_ATC_USER_DEFINE_DATATYPE = "_user_defined_data_type"; | ||||
const std::string ATTR_ATC_USER_DEFINE_FORMAT = "_user_defined_format"; | const std::string ATTR_ATC_USER_DEFINE_FORMAT = "_user_defined_format"; | ||||
@@ -431,7 +431,7 @@ graphStatus GeTensorDesc::GetShapeRange(std::vector<std::pair<int64_t, int64_t>> | |||||
return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
} | } | ||||
std::pair<int64_t, int64_t> pair({ele[0], ele[1]}); | std::pair<int64_t, int64_t> pair({ele[0], ele[1]}); | ||||
range.push_back(pair); | |||||
range.emplace_back(pair); | |||||
} | } | ||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
@@ -33,7 +33,6 @@ COMMON_LOCAL_SRC_FILES := \ | |||||
./utils/tuning_utils.cc \ | ./utils/tuning_utils.cc \ | ||||
./utils/graph_utils.cc \ | ./utils/graph_utils.cc \ | ||||
./utils/ge_ir_utils.cc \ | ./utils/ge_ir_utils.cc \ | ||||
./utils/node_utils.cc \ | |||||
./utils/op_desc_utils.cc \ | ./utils/op_desc_utils.cc \ | ||||
./utils/type_utils.cc \ | ./utils/type_utils.cc \ | ||||
./utils/tensor_utils.cc \ | ./utils/tensor_utils.cc \ | ||||
@@ -44,6 +43,7 @@ COMMON_LOCAL_SRC_FILES := \ | |||||
option/ge_context.cc \ | option/ge_context.cc \ | ||||
option/ge_local_context.cc \ | option/ge_local_context.cc \ | ||||
./runtime_inference_context.cc \ | ./runtime_inference_context.cc \ | ||||
./utils/node_utils.cc \ | |||||
COMMON_LOCAL_C_INCLUDES := \ | COMMON_LOCAL_C_INCLUDES := \ | ||||
proto/om.proto \ | proto/om.proto \ | ||||
@@ -68,7 +68,7 @@ graphStatus Node::Init() { | |||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
GE_CHK_BOOL_EXEC(op_ != nullptr, return GRAPH_FAILED, "original OpDesc is nullptr"); | GE_CHK_BOOL_EXEC(op_ != nullptr, return GRAPH_FAILED, "original OpDesc is nullptr"); | ||||
size_t size = op_->GetInputsSize(); | |||||
size_t size = op_->GetAllInputsSize(); | |||||
for (size_t i = 0; i < size; i++) { | for (size_t i = 0; i < size; i++) { | ||||
std::shared_ptr<InDataAnchor> anchor = ComGraphMakeShared<InDataAnchor>(shared_from_this(), i); | std::shared_ptr<InDataAnchor> anchor = ComGraphMakeShared<InDataAnchor>(shared_from_this(), i); | ||||
if (anchor == nullptr) { | if (anchor == nullptr) { | ||||
@@ -305,13 +305,19 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus Node::AddLinkFrom(con | |||||
GELOGE(GRAPH_FAILED, "add input desc failed."); | GELOGE(GRAPH_FAILED, "add input desc failed."); | ||||
return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
} | } | ||||
std::shared_ptr<InDataAnchor> anchor = ComGraphMakeShared<InDataAnchor>(shared_from_this(), in_data_anchors_.size()); | |||||
if (anchor == nullptr) { | |||||
GELOGE(GRAPH_FAILED, "out_anchor size is:%zu, malloc shared_ptr failed.", out_anchors.size()); | |||||
return GRAPH_FAILED; | |||||
if (index < GetAllInDataAnchors().size()) { | |||||
(void)out_anchors.at(0)->LinkTo(in_data_anchors_[index]); | |||||
} else { | |||||
std::shared_ptr<InDataAnchor> anchor = | |||||
ComGraphMakeShared<InDataAnchor>(shared_from_this(), in_data_anchors_.size()); | |||||
if (anchor == nullptr) { | |||||
GELOGE(GRAPH_FAILED, "out_anchor size is:%zu, malloc shared_ptr failed.", out_anchors.size()); | |||||
return GRAPH_FAILED; | |||||
} | |||||
in_data_anchors_.push_back(anchor); | |||||
(void)out_anchors.at(0)->LinkTo(in_data_anchors_.back()); | |||||
} | } | ||||
in_data_anchors_.push_back(anchor); | |||||
(void)out_anchors.at(0)->LinkTo(in_data_anchors_.back()); | |||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
@@ -347,20 +353,30 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus Node::AddLinkFrom(con | |||||
} | } | ||||
GE_CHECK_NOTNULL(op_); | GE_CHECK_NOTNULL(op_); | ||||
auto op_desc = input_node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
if (op_->AddInputDesc(name, op_desc->GetOutputDesc(0)) != GRAPH_SUCCESS) { | |||||
GELOGE(GRAPH_FAILED, "add input desc failed."); | |||||
return GRAPH_FAILED; | |||||
auto input_op_desc = input_node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(input_op_desc); | |||||
auto index = op_->GetInputIndexByName(name); | |||||
if (index != -1) { | |||||
if (index >= static_cast<int>(in_data_anchors_.size())) { | |||||
GELOGE(GRAPH_FAILED, "op %s get input name %s 's index %d is illegal.", op_->GetName().c_str(), name.c_str(), | |||||
index); | |||||
return GRAPH_FAILED; | |||||
} | |||||
(void)out_anchors.at(0)->LinkTo(in_data_anchors_[index]); | |||||
} else { | |||||
std::shared_ptr<InDataAnchor> anchor = | |||||
ComGraphMakeShared<InDataAnchor>(shared_from_this(), in_data_anchors_.size()); | |||||
if (anchor == nullptr) { | |||||
GELOGE(GRAPH_FAILED, "in_data_anchors_size is:%zu, malloc shared_ptr failed.", in_data_anchors_.size()); | |||||
return GRAPH_FAILED; | |||||
} | |||||
in_data_anchors_.push_back(anchor); | |||||
(void)out_anchors.at(0)->LinkTo(in_data_anchors_.back()); | |||||
} | } | ||||
std::shared_ptr<InDataAnchor> anchor = ComGraphMakeShared<InDataAnchor>(shared_from_this(), in_data_anchors_.size()); | |||||
if (anchor == nullptr) { | |||||
GELOGE(GRAPH_FAILED, "out_anchor size is:%zu, malloc shared_ptr failed.", out_anchors.size()); | |||||
if (op_->AddInputDesc(name, input_op_desc->GetOutputDesc(0)) != GRAPH_SUCCESS) { | |||||
GELOGE(GRAPH_FAILED, "add input desc failed."); | |||||
return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
} | } | ||||
in_data_anchors_.push_back(anchor); | |||||
(void)out_anchors.at(0)->LinkTo(in_data_anchors_.back()); | |||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
@@ -746,9 +762,10 @@ graphStatus Node::Verify() const { | |||||
if (!is_unknown_graph) { | if (!is_unknown_graph) { | ||||
for (const auto &in_anchor_ptr : GetAllInDataAnchors()) { | for (const auto &in_anchor_ptr : GetAllInDataAnchors()) { | ||||
GE_IF_BOOL_EXEC(in_anchor_ptr == nullptr, GELOGW("in anchor ptr is null"); continue); | GE_IF_BOOL_EXEC(in_anchor_ptr == nullptr, GELOGW("in anchor ptr is null"); continue); | ||||
bool valid_anchor = op_->GetType() == data_type || op_->GetType() == aipp_data_type || | |||||
op_->GetType() == const_type || op_->GetType() == variable_type || | |||||
op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || in_anchor_ptr->GetPeerAnchors().size() > 0; | |||||
bool valid_anchor = | |||||
op_->GetType() == data_type || op_->GetType() == aipp_data_type || op_->GetType() == const_type || | |||||
op_->GetType() == variable_type || op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || | |||||
op_->MutableInputDesc(in_anchor_ptr->GetIdx()) == nullptr || in_anchor_ptr->GetPeerAnchors().size() > 0; | |||||
if (!valid_anchor) { | if (!valid_anchor) { | ||||
ErrorManager::GetInstance().ATCReportErrMessage("E11019", {"opname", "index"}, | ErrorManager::GetInstance().ATCReportErrMessage("E11019", {"opname", "index"}, | ||||
{GetName(), std::to_string(in_anchor_ptr->GetIdx())}); | {GetName(), std::to_string(in_anchor_ptr->GetIdx())}); | ||||
@@ -347,7 +347,10 @@ graphStatus OpDesc::AddOptionalInputDesc(const string &name, const ge::GeTensorD | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus | ||||
OpDesc::UpdateInputDesc(uint32_t index, const ge::GeTensorDesc &tensor_Desc) { | OpDesc::UpdateInputDesc(uint32_t index, const ge::GeTensorDesc &tensor_Desc) { | ||||
GE_CHK_BOOL_RET_STATUS((index < inputs_desc_.size()), GRAPH_FAILED, "The index is invalid. index[%u]", index); | |||||
if (index >= inputs_desc_.size()) { | |||||
GELOGW("The index is invalid. index[%u]", index); | |||||
return GRAPH_FAILED; | |||||
} | |||||
inputs_desc_[index] = ComGraphMakeShared<GeTensorDesc>(tensor_Desc); | inputs_desc_[index] = ComGraphMakeShared<GeTensorDesc>(tensor_Desc); | ||||
if (inputs_desc_[index] == nullptr) { | if (inputs_desc_[index] == nullptr) { | ||||
@@ -675,7 +678,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ConstGeTensorDescPtr OpDesc::GetI | |||||
return nullptr; | return nullptr; | ||||
} | } | ||||
if (inputs_desc_[index]->IsValid() != GRAPH_SUCCESS) { | if (inputs_desc_[index]->IsValid() != GRAPH_SUCCESS) { | ||||
GELOGE(GRAPH_FAILED, "inputsDesc[%u] is InValid", index); | |||||
GELOGW("inputsDesc[%u] is InValid", index); | |||||
return nullptr; | return nullptr; | ||||
} else { | } else { | ||||
return inputs_desc_[static_cast<size_t>(index)]; | return inputs_desc_[static_cast<size_t>(index)]; | ||||
@@ -949,6 +952,43 @@ int OpDesc::GetInputIndexByName(const string &name) const { | |||||
return static_cast<int>(it_find->second); | return static_cast<int>(it_find->second); | ||||
} | } | ||||
int OpDesc::GetValidInputIndexByName(const string &name) const { | |||||
map<string, uint32_t> valid_input_name_idx{}; | |||||
uint32_t j = 0; | |||||
for (size_t i = 0; i < GetAllInputsSize(); i++) { | |||||
if (MutableInputDesc(static_cast<uint32_t>(i)) != nullptr) { | |||||
auto valid_name = GetInputNameByIndex(static_cast<uint32_t>(i)); | |||||
GE_CHK_BOOL_RET_STATUS_NOLOG(!valid_name.empty(), -1); | |||||
valid_input_name_idx.insert({valid_name, j}); | |||||
j++; | |||||
} | |||||
} | |||||
auto it_find = valid_input_name_idx.find(name); | |||||
GE_CHK_BOOL_RET_STATUS_NOLOG(it_find != valid_input_name_idx.end(), -1); | |||||
return static_cast<int>(it_find->second); | |||||
} | |||||
string OpDesc::GetValidInputNameByIndex(uint32_t index) const { | |||||
map<string, uint32_t> valid_input_name_idx{}; | |||||
uint32_t j = 0; | |||||
for (size_t i = 0; i < GetAllInputsSize(); i++) { | |||||
if (MutableInputDesc(static_cast<uint32_t>(i)) != nullptr) { | |||||
auto valid_name = GetInputNameByIndex(static_cast<uint32_t>(i)); | |||||
GE_CHK_BOOL_RET_STATUS_NOLOG(!valid_name.empty(), ""); | |||||
valid_input_name_idx.insert({valid_name, j}); | |||||
j++; | |||||
} | |||||
} | |||||
auto it = valid_input_name_idx.begin(); | |||||
for (; it != valid_input_name_idx.end(); ++it) { | |||||
if (it->second == index) { | |||||
break; | |||||
} | |||||
} | |||||
GE_CHK_BOOL_RET_STATUS_NOLOG(it != valid_input_name_idx.end(), ""); | |||||
return it->first; | |||||
} | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY string OpDesc::GetOutputNameByIndex(uint32_t index) const { | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY string OpDesc::GetOutputNameByIndex(uint32_t index) const { | ||||
auto it = output_name_idx_.begin(); | auto it = output_name_idx_.begin(); | ||||
for (; it != output_name_idx_.end(); ++it) { | for (; it != output_name_idx_.end(); ++it) { | ||||
@@ -1504,7 +1504,9 @@ class GraphBuilderImpl { | |||||
GE_CHK_BOOL_EXEC(dst_anchor != nullptr, return GRAPH_FAILED, "GetInDataAnchor failed."); | GE_CHK_BOOL_EXEC(dst_anchor != nullptr, return GRAPH_FAILED, "GetInDataAnchor failed."); | ||||
auto ret = GraphUtils::AddEdge(src_anchor, dst_anchor); | auto ret = GraphUtils::AddEdge(src_anchor, dst_anchor); | ||||
GE_CHK_BOOL_EXEC(ret == GRAPH_SUCCESS, return GRAPH_FAILED, "AddEdge failed."); | |||||
GE_CHK_BOOL_EXEC(ret == GRAPH_SUCCESS, return GRAPH_FAILED, | |||||
"from node[%s][%d] to node[%s][%d]AddEdge failed.", src_node_ptr->GetName().c_str(), | |||||
src_anchor->GetIdx(), dst_node_info->second->GetName().c_str(), dst_anchor->GetIdx()); | |||||
} | } | ||||
} | } | ||||
auto out_control_anchor = src_node_ptr->GetOutControlAnchor(); | auto out_control_anchor = src_node_ptr->GetOutControlAnchor(); | ||||
@@ -1536,19 +1538,23 @@ inline bool HasSameNameNode(const ComputeGraphPtr &compute_graph) { | |||||
for (const auto &graph : compute_graph->GetAllSubgraphs()) { | for (const auto &graph : compute_graph->GetAllSubgraphs()) { | ||||
std::set<string> node_names; | std::set<string> node_names; | ||||
for (auto const &node : graph->GetDirectNode()) { | for (auto const &node : graph->GetDirectNode()) { | ||||
node_names.insert(node->GetName()); | |||||
} | |||||
if (node_names.size() != graph->GetDirectNodesSize()) { | |||||
return true; | |||||
auto result = node_names.insert(node->GetName()); | |||||
if (!result.second) { | |||||
GELOGE(GRAPH_FAILED, "graph %s has same name node%s", graph->GetName().c_str(), node->GetName().c_str()); | |||||
return true; | |||||
} | |||||
} | } | ||||
} | } | ||||
std::set<string> node_names; | std::set<string> node_names; | ||||
for (auto const &node : compute_graph->GetDirectNode()) { | for (auto const &node : compute_graph->GetDirectNode()) { | ||||
node_names.insert(node->GetName()); | |||||
auto result = node_names.insert(node->GetName()); | |||||
if (!result.second) { | |||||
GELOGE(GRAPH_FAILED, "graph %s has same name node%s", compute_graph->GetName().c_str(), node->GetName().c_str()); | |||||
return true; | |||||
} | |||||
} | } | ||||
return node_names.size() != compute_graph->GetDirectNodesSize(); | |||||
return false; | |||||
} | } | ||||
ComputeGraphPtr GraphUtils::CreateGraphFromOperator(const string &name, const vector<ge::Operator> &inputs) { | ComputeGraphPtr GraphUtils::CreateGraphFromOperator(const string &name, const vector<ge::Operator> &inputs) { | ||||
@@ -56,7 +56,7 @@ class RefRelations::Impl { | |||||
} | } | ||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
GELOGW("can not find any relations! key value is %s", lookup_key.c_str()); | |||||
GELOGW("can not find any relations! key value of dest relation is %s", lookup_key.c_str()); | |||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
}; | }; | ||||
graphStatus BuildRefRelations(ge::ComputeGraph &root_graph); | graphStatus BuildRefRelations(ge::ComputeGraph &root_graph); | ||||
@@ -15,6 +15,7 @@ | |||||
*/ | */ | ||||
#include "graph/runtime_inference_context.h" | #include "graph/runtime_inference_context.h" | ||||
#include "graph/utils/tensor_adapter.h" | |||||
#include <cstdint> | #include <cstdint> | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
@@ -67,6 +68,14 @@ graphStatus RuntimeInferenceContext::SetTensor(int64_t node_id, int output_id, T | |||||
GELOGD("Set tensor for node_id = %ld, output_id = %d", node_id, output_id); | GELOGD("Set tensor for node_id = %ld, output_id = %d", node_id, output_id); | ||||
output_tensors[output_id] = std::move(tensor); | output_tensors[output_id] = std::move(tensor); | ||||
auto &output_ge_tensors = ge_tensors_[node_id]; | |||||
if (static_cast<uint32_t>(output_id) >= output_ge_tensors.size()) { | |||||
output_ge_tensors.resize(output_id + 1); | |||||
} | |||||
GELOGD("Set ge tensor for node_id = %ld, output_id = %d", node_id, output_id); | |||||
output_ge_tensors[output_id] = TensorAdapter::AsGeTensorPtr(tensor); | |||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
@@ -93,4 +102,28 @@ graphStatus RuntimeInferenceContext::GetTensor(int64_t node_id, int output_id, T | |||||
tensor = output_tensors[output_id]; | tensor = output_tensors[output_id]; | ||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
graphStatus RuntimeInferenceContext::GetTensor(int64_t node_id, int output_id, GeTensorPtr &tensor) { | |||||
if (output_id < 0) { | |||||
GELOGE(GRAPH_PARAM_INVALID, "Invalid output index: %d", output_id); | |||||
return GRAPH_PARAM_INVALID; | |||||
} | |||||
std::lock_guard<std::mutex> lk(mu_); | |||||
auto iter = ge_tensors_.find(node_id); | |||||
if (iter == ge_tensors_.end()) { | |||||
GELOGE(INTERNAL_ERROR, "Node not register. Id = %ld", node_id); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
auto &output_tensors = iter->second; | |||||
if (static_cast<uint32_t>(output_id) >= output_tensors.size()) { | |||||
GELOGE(GRAPH_FAILED, "Node output is not registered. node_id = %ld, output index = %d", node_id, output_id); | |||||
return GRAPH_FAILED; | |||||
} | |||||
GELOGD("Get ge tensor for node_id = %ld, output_id = %d", node_id, output_id); | |||||
tensor = output_tensors[output_id]; | |||||
return GRAPH_SUCCESS; | |||||
} | |||||
} // namespace ge | } // namespace ge |
@@ -51,6 +51,9 @@ graphStatus ReverseBrushWhileBodySubGraph(const ConstNodePtr &node) { | |||||
for (const auto &node_sub : sub_graph_body->GetAllNodes()) { | for (const auto &node_sub : sub_graph_body->GetAllNodes()) { | ||||
for (size_t i = 0; i < node_sub->GetAllInDataAnchorsSize(); i++) { | for (size_t i = 0; i < node_sub->GetAllInDataAnchorsSize(); i++) { | ||||
auto input_desc = node_sub->GetOpDesc()->MutableInputDesc(i); | auto input_desc = node_sub->GetOpDesc()->MutableInputDesc(i); | ||||
GE_IF_BOOL_EXEC(input_desc == nullptr, | |||||
GELOGW("Get null input by index %zu from node %s ", i, node_sub->GetName().c_str()); | |||||
continue); | |||||
(void)input_desc->SetUnknownDimNumShape(); | (void)input_desc->SetUnknownDimNumShape(); | ||||
} | } | ||||
for (size_t i = 0; i < node_sub->GetAllOutDataAnchorsSize(); i++) { | for (size_t i = 0; i < node_sub->GetAllOutDataAnchorsSize(); i++) { | ||||
@@ -376,10 +379,13 @@ graphStatus UpdateOpInputDesc(const ConstNodePtr &node_ptr) { | |||||
continue; | continue; | ||||
} | } | ||||
int peer_out_idx = peer_out_data_anchor->GetIdx(); | int peer_out_idx = peer_out_data_anchor->GetIdx(); | ||||
auto in_desc = node_ptr->GetOpDesc()->MutableInputDesc(static_cast<uint32_t>(in_idx)); | |||||
auto peer_out_desc = peer_out_data_node->GetOpDesc()->MutableOutputDesc(static_cast<uint32_t>(peer_out_idx)); | auto peer_out_desc = peer_out_data_node->GetOpDesc()->MutableOutputDesc(static_cast<uint32_t>(peer_out_idx)); | ||||
// check shape and dtype continuity. do not stop process | // check shape and dtype continuity. do not stop process | ||||
auto in_desc = node_ptr->GetOpDesc()->MutableInputDesc(static_cast<uint32_t>(in_idx)); | |||||
if (in_desc == nullptr) { | |||||
continue; | |||||
} | |||||
auto in_shape = in_desc->GetShape().GetDims(); | auto in_shape = in_desc->GetShape().GetDims(); | ||||
auto in_dtype = in_desc->GetDataType(); | auto in_dtype = in_desc->GetDataType(); | ||||
auto peer_out_shape = peer_out_desc->GetShape().GetDims(); | auto peer_out_shape = peer_out_desc->GetShape().GetDims(); | ||||
@@ -264,11 +264,11 @@ void OnnxUtils::AddAttrProtoForOpInAndOutDesc(onnx::NodeProto *node_proto, const | |||||
return; | return; | ||||
} | } | ||||
// Input describes | // Input describes | ||||
auto size_in = op_desc->GetInputsSize(); | |||||
auto size_in = op_desc->GetAllInputsSize(); | |||||
AddAttrProto(node_proto, onnx::AttributeProto_AttributeType_INT, "input_desc_nums", &size_in); | AddAttrProto(node_proto, onnx::AttributeProto_AttributeType_INT, "input_desc_nums", &size_in); | ||||
if (size_in > 0) { | if (size_in > 0) { | ||||
for (uint32_t i = 0; i < size_in; i++) { | for (uint32_t i = 0; i < size_in; i++) { | ||||
auto input_desc = op_desc->GetInputDescPtr(i); | |||||
auto input_desc = op_desc->GetInputDescPtrDfault(i); | |||||
if (input_desc != nullptr) { | if (input_desc != nullptr) { | ||||
auto data_type = TypeUtils::DataTypeToSerialString(input_desc->GetDataType()); | auto data_type = TypeUtils::DataTypeToSerialString(input_desc->GetDataType()); | ||||
AddAttrProto(node_proto, onnx::AttributeProto_AttributeType_STRING, "input_desc_dtype:" + std::to_string(i), | AddAttrProto(node_proto, onnx::AttributeProto_AttributeType_STRING, "input_desc_dtype:" + std::to_string(i), | ||||
@@ -480,9 +480,20 @@ void OnnxUtils::AddAttrProtoFromNodeMembers(const NodePtr &node, onnx::NodeProto | |||||
if (!recv_list.empty()) { | if (!recv_list.empty()) { | ||||
AddAttrProto(node_proto, onnx::AttributeProto_AttributeType_INTS, "recv_event_id_list", &recv_list); | AddAttrProto(node_proto, onnx::AttributeProto_AttributeType_INTS, "recv_event_id_list", &recv_list); | ||||
} | } | ||||
// 2.Attributes added from node's op_(message OpDef) | |||||
auto op_desc = node->op_; | auto op_desc = node->op_; | ||||
if (op_desc != nullptr) { | if (op_desc != nullptr) { | ||||
// for input_name_idx_ in opdesc | |||||
auto input_name_2_indexs = op_desc->GetAllInputName(); | |||||
::google::protobuf::RepeatedPtrField<::std::string> input_names; | |||||
::google::protobuf::RepeatedField<::google::protobuf::int64> input_indexes; | |||||
for (const auto &input_name_2_index : input_name_2_indexs) { | |||||
std::string input_name = input_name_2_index.first; | |||||
input_names.Add(std::move(input_name)); | |||||
input_indexes.Add(input_name_2_index.second); | |||||
} | |||||
AddAttrProto(node_proto, onnx::AttributeProto_AttributeType_STRINGS, "_input_name_key", input_names); | |||||
AddAttrProto(node_proto, onnx::AttributeProto_AttributeType_INTS, "_input_name_value", input_indexes); | |||||
// 2.Attributes added from node's op_(message OpDef) | |||||
// Input and out describes | // Input and out describes | ||||
AddAttrProtoForOpInAndOutDesc(node_proto, op_desc); | AddAttrProtoForOpInAndOutDesc(node_proto, op_desc); | ||||
// Others | // Others | ||||
@@ -1470,8 +1470,7 @@ graphStatus GraphUtils::CopyTensorAttrs(const OpDescPtr &dst_desc, const NodePtr | |||||
for (uint32_t i = 0; i < src_node->GetAllInDataAnchorsSize(); ++i) { | for (uint32_t i = 0; i < src_node->GetAllInDataAnchorsSize(); ++i) { | ||||
auto input_desc = dst_desc->MutableInputDesc(i); | auto input_desc = dst_desc->MutableInputDesc(i); | ||||
if (input_desc == nullptr) { | if (input_desc == nullptr) { | ||||
GELOGE(GRAPH_FAILED, "Param dst node not valid"); | |||||
return GRAPH_FAILED; | |||||
continue; | |||||
} | } | ||||
input_desc->CopyAttrsFrom(src_desc->GetInputDesc(i)); | input_desc->CopyAttrsFrom(src_desc->GetInputDesc(i)); | ||||
} | } | ||||
@@ -14,8 +14,8 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "utils/node_utils.h" | |||||
#include "utils/op_desc_utils.h" | |||||
#include "graph/utils/node_utils.h" | |||||
#include "graph/utils/op_desc_utils.h" | |||||
#include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
#include "debug/ge_op_types.h" | #include "debug/ge_op_types.h" | ||||
#include "debug/ge_util.h" | #include "debug/ge_util.h" | ||||
@@ -23,8 +23,13 @@ | |||||
#include "graph/anchor.h" | #include "graph/anchor.h" | ||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "graph/types.h" | #include "graph/types.h" | ||||
#include "utils/tensor_utils.h" | |||||
#include "utils/type_utils.h" | |||||
#include "external/graph/operator.h" | |||||
#include "graph/ge_context.h" | |||||
#include "graph/runtime_inference_context.h" | |||||
#include "graph/utils/op_desc_utils.h" | |||||
#include "graph/utils/tensor_utils.h" | |||||
#include "graph/utils/tensor_adapter.h" | |||||
#include "graph/utils/type_utils.h" | |||||
namespace ge { | namespace ge { | ||||
std::map<NodePtr, std::vector<uint32_t>> NodeUtils::map_send_info_{}; | std::map<NodePtr, std::vector<uint32_t>> NodeUtils::map_send_info_{}; | ||||
@@ -575,6 +580,58 @@ graphStatus NodeUtils::GetNodeUnknownShapeStatus(const Node &node, bool &is_unkn | |||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
graphStatus NodeUtils::GetInputConstData(const ConstNodePtr &node_ptr, const string &dst_name, GeTensorPtr &ge_tensor) { | |||||
GE_CHECK_NOTNULL(node_ptr); | |||||
return NodeUtils::GetInputConstData(*node_ptr, dst_name, ge_tensor); | |||||
} | |||||
graphStatus NodeUtils::GetInputConstData(const Node &node, const string &dst_name, GeTensorPtr &ge_tensor) { | |||||
// For inner compute graph | |||||
auto op_desc = node.GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
auto index = op_desc->GetInputIndexByName(dst_name); | |||||
auto in_data_anchor = node.GetInDataAnchor(index); | |||||
GE_CHECK_NOTNULL(in_data_anchor); | |||||
auto out_data_anchor = in_data_anchor->GetPeerOutAnchor(); | |||||
GE_CHECK_NOTNULL(out_data_anchor); | |||||
auto peer_node = out_data_anchor->GetOwnerNode(); | |||||
GE_CHECK_NOTNULL(peer_node); | |||||
auto peer_op_desc = peer_node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(peer_op_desc); | |||||
auto peer_op_type = peer_op_desc->GetType(); | |||||
if (peer_op_type == CONSTANTOP || peer_op_type == CONSTANT) { | |||||
if (!AttrUtils::MutableTensor(peer_node->GetOpDesc(), ATTR_NAME_WEIGHTS, ge_tensor)) { | |||||
GELOGW("get attr name %s failed.", ATTR_NAME_WEIGHTS.c_str()); | |||||
return GRAPH_FAILED; | |||||
} | |||||
return GRAPH_SUCCESS; | |||||
} else if (peer_op_type == DATA) { | |||||
auto parent_node = NodeUtils::GetParentInput(peer_node); | |||||
while ((parent_node != nullptr) && (parent_node->GetType() == DATA)) { | |||||
parent_node = NodeUtils::GetParentInput(parent_node); | |||||
} | |||||
if ((parent_node != nullptr) && ((parent_node->GetType() == CONSTANT) || (parent_node->GetType() == CONSTANTOP))) { | |||||
if (!AttrUtils::MutableTensor(parent_node->GetOpDesc(), ATTR_NAME_WEIGHTS, ge_tensor)) { | |||||
GELOGW("get attr name %s failed.", ATTR_NAME_WEIGHTS.c_str()); | |||||
return GRAPH_FAILED; | |||||
} | |||||
return GRAPH_SUCCESS; | |||||
} | |||||
} | |||||
// Try get from runtime inference context | |||||
auto session_id = std::to_string(GetContext().SessionId()); | |||||
RuntimeInferenceContext *runtime_infer_ctx = nullptr; | |||||
if (RuntimeInferenceContext::GetContext(session_id, &runtime_infer_ctx) == GRAPH_SUCCESS) { | |||||
GELOGD("To get constant from runtime inference context. session_id = %s", session_id.c_str()); | |||||
auto ret = runtime_infer_ctx->GetTensor(peer_node->GetOpDesc()->GetId(), out_data_anchor->GetIdx(), ge_tensor); | |||||
if (ret == GRAPH_SUCCESS) { | |||||
return GRAPH_SUCCESS; | |||||
} | |||||
} | |||||
GELOGW("node[%s]'s input[%s]'s peer node is not const", node.GetName().c_str(), dst_name.c_str()); | |||||
return GRAPH_FAILED; | |||||
} | |||||
std::string NodeUtils::GetNodeType(const Node &node) { | std::string NodeUtils::GetNodeType(const Node &node) { | ||||
if (node.GetType() != FRAMEWORKOP) { | if (node.GetType() != FRAMEWORKOP) { | ||||
return node.GetType(); | return node.GetType(); | ||||
@@ -587,14 +644,6 @@ std::string NodeUtils::GetNodeType(const Node &node) { | |||||
std::string NodeUtils::GetNodeType(const NodePtr &node) { return node == nullptr ? "" : GetNodeType(*node); } | std::string NodeUtils::GetNodeType(const NodePtr &node) { return node == nullptr ? "" : GetNodeType(*node); } | ||||
graphStatus NodeUtils::GetInputConstData(const ConstNodePtr &node_ptr, const string &dst_name, GeTensorPtr &ge_tensor) { | |||||
return GRAPH_SUCCESS; | |||||
} | |||||
graphStatus NodeUtils::GetInputConstData(const Node &node, const string &dst_name, GeTensorPtr &ge_tensor) { | |||||
return GRAPH_SUCCESS; | |||||
} | |||||
ComputeGraphPtr NodeUtils::GetSubgraph(const Node &node, uint32_t index) { | ComputeGraphPtr NodeUtils::GetSubgraph(const Node &node, uint32_t index) { | ||||
auto op_desc = node.GetOpDesc(); | auto op_desc = node.GetOpDesc(); | ||||
if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
@@ -513,7 +513,6 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY vector<GeTensorPtr> OpDescUtils:: | |||||
} | } | ||||
return MutableWeights(*node); | return MutableWeights(*node); | ||||
} | } | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus | ||||
OpDescUtils::SetWeights(ge::Node &node, const vector<ge::GeTensorPtr> &weights) { | OpDescUtils::SetWeights(ge::Node &node, const vector<ge::GeTensorPtr> &weights) { | ||||
GE_CHK_BOOL_EXEC(node.GetOpDesc() != nullptr, return GRAPH_PARAM_INVALID, "node.GetOpDesc is nullptr!"); | GE_CHK_BOOL_EXEC(node.GetOpDesc() != nullptr, return GRAPH_PARAM_INVALID, "node.GetOpDesc is nullptr!"); | ||||
@@ -561,6 +560,53 @@ OpDescUtils::SetWeights(ge::Node &node, const vector<ge::GeTensorPtr> &weights) | |||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus | |||||
OpDescUtils::SetWeights(ge::Node &node, const map<int, ge::GeTensorPtr> &weights_map) { | |||||
GE_CHECK_NOTNULL(node.GetOpDesc()); | |||||
// 1. node is const | |||||
if (node.GetOpDesc()->GetType() == CONSTANT) { | |||||
if (weights_map.size() == CONST_OP_NORMAL_WEIGHT_SIZE) { | |||||
return SetWeights(node.GetOpDesc(), weights_map.begin()->second); | |||||
} | |||||
GELOGE(GRAPH_PARAM_INVALID, "const op %s weight size %zu should be 1", node.GetName().c_str(), weights_map.size()); | |||||
return GRAPH_PARAM_INVALID; | |||||
} | |||||
// 2. node is not const | |||||
for (const auto &pair : weights_map) { | |||||
auto in_data_anchor = node.GetInDataAnchor(pair.first); | |||||
if (in_data_anchor != nullptr && in_data_anchor->GetPeerOutAnchor() != nullptr) { | |||||
// a. update const input node | |||||
auto out_anchor = in_data_anchor->GetPeerOutAnchor(); | |||||
auto peer_node = out_anchor->GetOwnerNode(); | |||||
if (peer_node == nullptr) { | |||||
GELOGE(GRAPH_PARAM_INVALID, "op %s [%d]'s input node is null", node.GetName().c_str(), pair.first); | |||||
return GRAPH_PARAM_INVALID; | |||||
} | |||||
if (peer_node->GetType() != CONSTANT) { | |||||
GELOGE(GRAPH_PARAM_INVALID, " op %s [%d]'s input node should be const, but is %s type:%s ", | |||||
node.GetName().c_str(), pair.first, peer_node->GetName().c_str(), peer_node->GetType().c_str()); | |||||
} | |||||
SetWeights(peer_node->GetOpDesc(), pair.second); | |||||
} else { | |||||
// b. create new const input node | |||||
auto const_opdesc = CreateConstOp(pair.second); | |||||
GE_CHECK_NOTNULL(const_opdesc); | |||||
auto owner_graph = node.GetOwnerComputeGraph(); | |||||
if (owner_graph == nullptr) { | |||||
GELOGE(GRAPH_PARAM_INVALID, "node's graph is empty, name: %s", node.GetName().c_str()); | |||||
return GRAPH_PARAM_INVALID; | |||||
} | |||||
auto const_node = owner_graph->AddNodeFront(const_opdesc); | |||||
if (node.AddLinkFrom(static_cast<uint32_t>(pair.first), const_node) != GRAPH_SUCCESS) { | |||||
GELOGE(GRAPH_FAILED, "op %s add const to input index[%d] failed", node.GetName().c_str(), pair.first); | |||||
return GRAPH_FAILED; | |||||
} | |||||
} | |||||
} | |||||
NodeUtils::UpdateIsInputConst(node); | |||||
return GRAPH_SUCCESS; | |||||
} | |||||
OpDescPtr OpDescUtils::CreateConstOp(const GeTensorPtr &tensor_ptr) { | OpDescPtr OpDescUtils::CreateConstOp(const GeTensorPtr &tensor_ptr) { | ||||
GE_CHK_BOOL_EXEC(tensor_ptr != nullptr, return nullptr, "tensor_ptr is nullptr!"); | GE_CHK_BOOL_EXEC(tensor_ptr != nullptr, return nullptr, "tensor_ptr is nullptr!"); | ||||
shared_ptr<OpDesc> const_opdesc = ComGraphMakeShared<OpDesc>(); | shared_ptr<OpDesc> const_opdesc = ComGraphMakeShared<OpDesc>(); | ||||
@@ -51,6 +51,7 @@ include_directories(${GE_SOURCE_DIR}/inc/graph) | |||||
include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib) | include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib) | ||||
include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) | include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) | ||||
include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) | include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) | ||||
include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/toolchain) | |||||
include_directories(${CMAKE_BINARY_DIR}) | include_directories(${CMAKE_BINARY_DIR}) | ||||
include_directories(${CMAKE_BINARY_DIR}/proto/ge) | include_directories(${CMAKE_BINARY_DIR}/proto/ge) | ||||
@@ -58,6 +59,7 @@ include_directories(${CMAKE_BINARY_DIR}/proto/ge) | |||||
# need to remove dependencies on pb files later | # need to remove dependencies on pb files later | ||||
file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | ||||
"analyzer/analyzer.cc" | "analyzer/analyzer.cc" | ||||
"client/ge_prof.cc" | |||||
"client/ge_api.cc" | "client/ge_api.cc" | ||||
"common/dump/dump_manager.cc" | "common/dump/dump_manager.cc" | ||||
"common/dump/dump_properties.cc" | "common/dump/dump_properties.cc" | ||||
@@ -225,6 +227,9 @@ target_link_libraries(ge_runner | |||||
${msprof} | ${msprof} | ||||
${runtime} | ${runtime} | ||||
${resouce} | ${resouce} | ||||
${ascend_hal} | |||||
${adump_server} | |||||
${msprofiler} | |||||
rt | rt | ||||
dl) | dl) | ||||
@@ -235,6 +240,7 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
"common/dump/dump_properties.cc" | "common/dump/dump_properties.cc" | ||||
"common/dump/dump_manager.cc" | "common/dump/dump_manager.cc" | ||||
"common/dump/dump_op.cc" | "common/dump/dump_op.cc" | ||||
"common/dump/dump_server.cc" | |||||
"common/formats/format_transfers/*.cc" | "common/formats/format_transfers/*.cc" | ||||
"common/formats/formats.cc" | "common/formats/formats.cc" | ||||
"common/formats/utils/formats_trans_utils.cc" | "common/formats/utils/formats_trans_utils.cc" | ||||
@@ -335,6 +341,7 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
"host_kernels/unpack_kernel.cc" | "host_kernels/unpack_kernel.cc" | ||||
"host_kernels/unsqueeze_kernel.cc" | "host_kernels/unsqueeze_kernel.cc" | ||||
"hybrid/hybrid_davinci_model_stub.cc" | "hybrid/hybrid_davinci_model_stub.cc" | ||||
"hybrid/node_executor/aicpu/aicpu_ext_info.cc" | |||||
"init/gelib.cc" | "init/gelib.cc" | ||||
"ir_build/atc_ir_common.cc" | "ir_build/atc_ir_common.cc" | ||||
"ir_build/ge_ir_build.cc" | "ir_build/ge_ir_build.cc" | ||||
@@ -352,7 +359,10 @@ add_library(ge_compiler SHARED ${INFER_SRC_LIST} ${PROTO_SRCS} ${PROTO_HEADER_HD | |||||
target_compile_definitions(ge_compiler PRIVATE | target_compile_definitions(ge_compiler PRIVATE | ||||
PROTOBUF_INLINE_NOT_IN_HEADERS=0 | PROTOBUF_INLINE_NOT_IN_HEADERS=0 | ||||
REUSE_MEMORY=1 | REUSE_MEMORY=1 | ||||
FMK_HOST_INFER) | |||||
FMK_HOST_INFER | |||||
FMK_SUPPORT_DUMP | |||||
COMPILE_OMG_PACKAGE | |||||
REUSE_MEMORY=1) | |||||
target_link_libraries(ge_compiler | target_link_libraries(ge_compiler | ||||
graph | graph | ||||
ge_common | ge_common | ||||
@@ -101,7 +101,7 @@ Status Analyzer::BuildJsonObject(uint64_t session_id, uint64_t graph_id) { | |||||
ge::Status Analyzer::Initialize() { | ge::Status Analyzer::Initialize() { | ||||
ClearHistoryFile(); | ClearHistoryFile(); | ||||
return CreateAnalyzerFile(); | |||||
return SUCCESS; | |||||
} | } | ||||
void Analyzer::Finalize() { | void Analyzer::Finalize() { | ||||
@@ -136,7 +136,7 @@ void Analyzer::DestroyGraphJsonObject(uint64_t session_id, uint64_t graph_id) { | |||||
} else { | } else { | ||||
auto iter1 = (iter->second).find(graph_id); | auto iter1 = (iter->second).find(graph_id); | ||||
if (iter1 == (iter->second).end()) { | if (iter1 == (iter->second).end()) { | ||||
GELOGW("can not find the graph json object by session_id[%lu] and graph_id[%lu].Do nothing", session_id, | |||||
GELOGW("Can not find the graph json object by session_id[%lu] and graph_id[%lu]. Do nothing.", session_id, | |||||
graph_id); | graph_id); | ||||
} | } | ||||
(iter->second).erase(iter1); | (iter->second).erase(iter1); | ||||
@@ -169,6 +169,10 @@ void Analyzer::ClearHistoryFile() { | |||||
} | } | ||||
ge::Status Analyzer::CreateAnalyzerFile() { | ge::Status Analyzer::CreateAnalyzerFile() { | ||||
if (is_json_file_create_) { | |||||
GELOGD("analyzer file has been created!No necessary to create again!"); | |||||
return SUCCESS; | |||||
} | |||||
GELOGD("start to create analyzer file!"); | GELOGD("start to create analyzer file!"); | ||||
// Check whether the manifest exists, if not, create it. | // Check whether the manifest exists, if not, create it. | ||||
string real_path = RealPath(kFilePath.c_str()); | string real_path = RealPath(kFilePath.c_str()); | ||||
@@ -176,18 +180,19 @@ ge::Status Analyzer::CreateAnalyzerFile() { | |||||
GELOGE(FAILED, "File path is invalid."); | GELOGE(FAILED, "File path is invalid."); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
string file = real_path + "/" + kAnalyzeFile; | |||||
GELOGD("Created analyzer file:[%s]", file.c_str()); | |||||
int fd = open(file.c_str(), O_WRONLY | O_CREAT | O_TRUNC, kFileAuthority); | |||||
std::lock_guard<std::mutex> lg(file_mutex_); | |||||
json_file_name_ = real_path + "/" + kAnalyzeFile; | |||||
GELOGD("Created analyzer file:[%s]", json_file_name_.c_str()); | |||||
int fd = open(json_file_name_.c_str(), O_WRONLY | O_CREAT | O_TRUNC, kFileAuthority); | |||||
if (fd < 0) { | if (fd < 0) { | ||||
GELOGE(INTERNAL_ERROR, "Fail to open the file: %s.", file.c_str()); | |||||
GELOGE(INTERNAL_ERROR, "Fail to open the file: %s.", json_file_name_.c_str()); | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
if (close(fd) != 0) { | if (close(fd) != 0) { | ||||
GELOGE(INTERNAL_ERROR, "Fail to close the file: %s.", file.c_str()); | |||||
GELOGE(INTERNAL_ERROR, "Fail to close the file: %s.", json_file_name_.c_str()); | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
json_file_name_ = file; | |||||
is_json_file_create_ = true; | |||||
GELOGD("success to create analyzer file[%s]!", json_file_name_.c_str()); | GELOGD("success to create analyzer file[%s]!", json_file_name_.c_str()); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -231,6 +236,12 @@ ge::Status Analyzer::DoAnalyze(DataInfo &data_info) { | |||||
GELOGE(status, "save op info failed!"); | GELOGE(status, "save op info failed!"); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// create json file | |||||
status = CreateAnalyzerFile(); | |||||
if (status != SUCCESS) { | |||||
GELOGE(status, "create analyzer file failed!"); | |||||
return status; | |||||
} | |||||
// save data to file | // save data to file | ||||
return SaveAnalyzerDataToFile(); | return SaveAnalyzerDataToFile(); | ||||
} | } | ||||
@@ -24,6 +24,7 @@ | |||||
#include <mutex> | #include <mutex> | ||||
#include <memory> | #include <memory> | ||||
#include <fstream> | #include <fstream> | ||||
#include <atomic> | |||||
#include "external/ge/ge_api_types.h" | #include "external/ge/ge_api_types.h" | ||||
#include "graph/compute_graph.h" | #include "graph/compute_graph.h" | ||||
@@ -181,6 +182,7 @@ class Analyzer { | |||||
std::mutex file_mutex_; // protect json_file_ | std::mutex file_mutex_; // protect json_file_ | ||||
std::ofstream json_file_; | std::ofstream json_file_; | ||||
std::string json_file_name_; | std::string json_file_name_; | ||||
std::atomic_bool is_json_file_create_{false}; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // DOMI_ANALYZER_ANANLYZER_H_ | #endif // DOMI_ANALYZER_ANANLYZER_H_ |
@@ -29,6 +29,7 @@ file(GLOB PROTO_HEADER_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | ||||
"ge_api.cc" | "ge_api.cc" | ||||
"ge_prof.cc" | |||||
) | ) | ||||
ge_protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | ge_protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | ||||
@@ -66,5 +67,8 @@ target_link_libraries(ge_client | |||||
${slog} | ${slog} | ||||
${mmpa} | ${mmpa} | ||||
${runtime} | ${runtime} | ||||
${msprof} | |||||
${msprofiler} | |||||
${ascend_hal} | |||||
rt | rt | ||||
dl) | dl) |
@@ -16,6 +16,7 @@ | |||||
#include "ge/ge_api.h" | #include "ge/ge_api.h" | ||||
#include <iostream> | #include <iostream> | ||||
#include <malloc.h> | |||||
#include "common/debug/log.h" | #include "common/debug/log.h" | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "common/ge/datatype_util.h" | #include "common/ge/datatype_util.h" | ||||
@@ -39,7 +40,7 @@ using std::vector; | |||||
namespace { | namespace { | ||||
const int32_t kMaxStrLen = 128; | const int32_t kMaxStrLen = 128; | ||||
} | |||||
} // namespace | |||||
static bool g_ge_initialized = false; | static bool g_ge_initialized = false; | ||||
static std::mutex g_ge_release_mutex; // GEFinalize and ~Session use | static std::mutex g_ge_release_mutex; // GEFinalize and ~Session use | ||||
@@ -163,6 +164,9 @@ Status GEFinalize() { | |||||
g_ge_initialized = false; | g_ge_initialized = false; | ||||
} | } | ||||
// to avoid memory fragment, use malloc_trim to back free stack to system | |||||
malloc_trim(0); | |||||
GELOGT(TRACE_STOP, "GEFinalize finished"); | GELOGT(TRACE_STOP, "GEFinalize finished"); | ||||
return ret; | return ret; | ||||
} | } | ||||
@@ -0,0 +1,356 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include "ge/ge_prof.h" | |||||
#include "ge/ge_api.h" | |||||
#include "init/gelib.h" | |||||
#include "common/debug/log.h" | |||||
#include "framework/common/debug/ge_log.h" | |||||
#include "common/profiling/profiling_manager.h" | |||||
#include "graph/load/graph_loader.h" | |||||
#include "toolchain/prof_acl_api.h" | |||||
using std::map; | |||||
using std::string; | |||||
using std::vector; | |||||
namespace { | |||||
const uint32_t kMaxDeviceNum = 64; | |||||
const uint32_t kDeviceListIndex = 3; | |||||
const std::string kProfilingInit = "prof_init"; | |||||
const std::string kProfilingFinalize = "prof_finalize"; | |||||
const std::string kProfilingStart = "prof_start"; | |||||
const std::string kProfilingStop = "prof_stop"; | |||||
const std::string kDeviceNums = "devNums"; | |||||
const std::string kDeviceIdList = "devIdList"; | |||||
const std::string kAicoreMetrics = "aicoreMetrics"; | |||||
const std::map<ge::ProfilingAicoreMetrics, std::string> kProfAicoreMetricsToString = { | |||||
{ge::kAicoreArithmaticThroughput, "AICORE_ARITHMATIC_THROUGHPUT"}, | |||||
{ge::kAicorePipeline, "AICORE_PIPELINE"}, | |||||
{ge::kAicoreSynchronization, "AICORE_SYNCHRONIZATION"}, | |||||
{ge::kAicoreMemory, "AICORE_MEMORY"}, | |||||
{ge::kAicoreInternalMemory, "AICORE_INTERNAL_MEMORY"}, | |||||
{ge::kAicoreStall, "AICORE_STALL"}}; | |||||
} // namespace | |||||
static bool g_graph_prof_init_ = false; | |||||
static std::mutex g_prof_mutex_; | |||||
namespace ge { | |||||
struct aclgrphProfConfig { | |||||
ProfConfig config; | |||||
}; | |||||
Status aclgrphProfInit(const char *profiler_path, uint32_t length) { | |||||
GELOGT(TRACE_INIT, "Graph prof init start"); | |||||
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||||
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||||
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); | |||||
return FAILED; | |||||
} | |||||
std::lock_guard<std::mutex> lock(g_prof_mutex_); | |||||
if (g_graph_prof_init_) { | |||||
GELOGW("Multi graph profiling initializations."); | |||||
return GE_PROF_MULTI_INIT; | |||||
} | |||||
Status ret = CheckPath(profiler_path, length); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Profiling config path is invalid."); | |||||
return ret; | |||||
} | |||||
// if command mode is set, just return | |||||
if (ProfilingManager::Instance().ProfilingOn()) { | |||||
GELOGW("Graph prof init failed, cause profiling command pattern is running."); | |||||
return GE_PROF_MODE_CONFLICT; | |||||
} | |||||
ret = ProfInit(profiler_path); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "ProfInit init fail"); | |||||
return ret; | |||||
} | |||||
GraphLoader graph_loader; | |||||
Command command; | |||||
command.cmd_params.clear(); | |||||
command.cmd_type = kProfilingInit; | |||||
command.module_index = PROF_MODEL_LOAD; | |||||
ret = graph_loader.CommandHandle(command); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Handle profiling command %s failed, config = %s", kProfilingInit.c_str(), profiler_path); | |||||
return ret; | |||||
} | |||||
if (!g_graph_prof_init_) { | |||||
g_graph_prof_init_ = true; | |||||
GELOGI("Profiling init successfully."); | |||||
} | |||||
GELOGI("Successfully execute GraphProfInit."); | |||||
return SUCCESS; | |||||
} | |||||
Status aclgrphProfFinalize() { | |||||
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||||
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||||
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); | |||||
return FAILED; | |||||
} | |||||
std::lock_guard<std::mutex> lock(g_prof_mutex_); | |||||
// if command mode is set, just return | |||||
if (ProfilingManager::Instance().ProfilingOn()) { | |||||
GELOGW("Graph prof finalize failed, cause profiling command pattern is running."); | |||||
return GE_PROF_MODE_CONFLICT; | |||||
} | |||||
if (!g_graph_prof_init_) { | |||||
GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize."); | |||||
return GE_PROF_NOT_INIT; | |||||
} | |||||
GraphLoader graph_loader; | |||||
Command command; | |||||
command.cmd_params.clear(); | |||||
command.cmd_type = kProfilingFinalize; | |||||
Status ret = graph_loader.CommandHandle(command); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Handle profiling command %s failed.", kProfilingFinalize.c_str()); | |||||
return ret; | |||||
} | |||||
ret = ProfFinalize(); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Finalize profiling failed, result = %d", ret); | |||||
} | |||||
if (ret == SUCCESS) { | |||||
g_graph_prof_init_ = false; | |||||
GELOGI("Successfully execute GraphProfFinalize."); | |||||
} | |||||
return ret; | |||||
} | |||||
bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector<string> &prof_config_params) { | |||||
prof_config_params.clear(); | |||||
prof_config_params.emplace_back(kDeviceNums); | |||||
prof_config_params.emplace_back(std::to_string(profiler_config->config.devNums)); | |||||
prof_config_params.emplace_back(kDeviceIdList); | |||||
std::string devID = ""; | |||||
if (profiler_config->config.devNums == 0) { | |||||
GELOGW("The device num is invalid."); | |||||
return false; | |||||
} | |||||
for (uint32_t i = 0; i < profiler_config->config.devNums; i++) { | |||||
devID.append(std::to_string(profiler_config->config.devIdList[i])); | |||||
if (i != profiler_config->config.devNums - 1) { | |||||
devID.append(","); | |||||
} | |||||
} | |||||
prof_config_params.push_back(devID); | |||||
prof_config_params.push_back(kAicoreMetrics); | |||||
auto iter = | |||||
kProfAicoreMetricsToString.find(static_cast<ProfilingAicoreMetrics>(profiler_config->config.aicoreMetrics)); | |||||
if (iter == kProfAicoreMetricsToString.end()) { | |||||
GELOGW("The prof aicore metrics is invalid."); | |||||
return false; | |||||
} | |||||
prof_config_params.push_back(iter->second); | |||||
return true; | |||||
} | |||||
bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { | |||||
if (deviceid_list == nullptr) { | |||||
GELOGE(PARAM_INVALID, "deviceIdList is nullptr"); | |||||
return false; | |||||
} | |||||
if (device_nums == 0 || device_nums > kMaxDeviceNum) { | |||||
GELOGE(PARAM_INVALID, "The device nums is invalid."); | |||||
return false; | |||||
} | |||||
// real device num | |||||
int32_t dev_count = 0; | |||||
rtError_t rt_err = rtGetDeviceCount(&dev_count); | |||||
if (rt_err != RT_ERROR_NONE) { | |||||
GELOGE(INTERNAL_ERROR, "Get the Device count fail."); | |||||
return false; | |||||
} | |||||
if (device_nums > static_cast<uint32_t>(dev_count)) { | |||||
GELOGE(PARAM_INVALID, "Device num(%u) is not in range 1 ~ %d.", device_nums, dev_count); | |||||
return false; | |||||
} | |||||
std::unordered_set<uint32_t> record; | |||||
for (size_t i = 0; i < device_nums; ++i) { | |||||
uint32_t dev_id = deviceid_list[i]; | |||||
if (dev_id >= static_cast<uint32_t>(dev_count)) { | |||||
GELOGE(PARAM_INVALID, "Device id %u is not in range 0 ~ %d(exclude %d)", dev_id, dev_count, dev_count); | |||||
return false; | |||||
} | |||||
if (record.count(dev_id) > 0) { | |||||
GELOGE(PARAM_INVALID, "Device id %u is duplicatedly set", dev_id); | |||||
return false; | |||||
} | |||||
record.insert(dev_id); | |||||
} | |||||
return true; | |||||
} | |||||
aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t device_nums, | |||||
ProfilingAicoreMetrics aicore_metrics, ProfAicoreEvents *aicore_events, | |||||
uint64_t data_type_config) { | |||||
if (!isProfConfigValid(deviceid_list, device_nums)) { | |||||
return nullptr; | |||||
} | |||||
aclgrphProfConfig *config = new (std::nothrow) aclgrphProfConfig(); | |||||
if (config == nullptr) { | |||||
GELOGE(INTERNAL_ERROR, "new aclgrphProfConfig fail"); | |||||
return nullptr; | |||||
} | |||||
config->config.devNums = device_nums; | |||||
if (memcpy_s(config->config.devIdList, sizeof(config->config.devIdList), deviceid_list, | |||||
device_nums * sizeof(uint32_t)) != EOK) { | |||||
GELOGE(INTERNAL_ERROR, "copy devID failed. size = %u", device_nums); | |||||
delete config; | |||||
return nullptr; | |||||
} | |||||
config->config.aicoreMetrics = static_cast<ProfAicoreMetrics>(aicore_metrics); | |||||
config->config.dataTypeConfig = data_type_config; | |||||
GELOGI("Successfully create prof config."); | |||||
return config; | |||||
} | |||||
Status aclgrphProfDestroyConfig(aclgrphProfConfig *profiler_config) { | |||||
if (profiler_config == nullptr) { | |||||
GELOGE(PARAM_INVALID, "destroy profilerConfig failed, profilerConfig must not be nullptr"); | |||||
return PARAM_INVALID; | |||||
} | |||||
delete profiler_config; | |||||
GELOGI("Successfully destroy prof config."); | |||||
return SUCCESS; | |||||
} | |||||
Status aclgrphProfStart(aclgrphProfConfig *profiler_config) { | |||||
if (profiler_config == nullptr) { | |||||
GELOGE(PARAM_INVALID, "aclgrphProfConfig is invalid."); | |||||
return FAILED; | |||||
} | |||||
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||||
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||||
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); | |||||
return FAILED; | |||||
} | |||||
std::lock_guard<std::mutex> lock(g_prof_mutex_); | |||||
// if command mode is set, just return | |||||
if (ProfilingManager::Instance().ProfilingOn()) { | |||||
GELOGW("Graph prof finalize failed, cause profiling command pattern is running."); | |||||
return GE_PROF_MODE_CONFLICT; | |||||
} | |||||
if (!g_graph_prof_init_) { | |||||
GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize."); | |||||
return GE_PROF_NOT_INIT; | |||||
} | |||||
Status ret = ProfStartProfiling(&profiler_config->config); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Start profiling failed, prof result = %d", ret); | |||||
return FAILED; | |||||
} | |||||
std::vector<string> prof_params; | |||||
if (!TransProfConfigToParam(profiler_config, prof_params)) { | |||||
GELOGE(PARAM_INVALID, "Transfer profilerConfig to string vector failed"); | |||||
return PARAM_INVALID; | |||||
} | |||||
GraphLoader graph_loader; | |||||
Command command; | |||||
command.cmd_params.clear(); | |||||
command.cmd_type = kProfilingStart; | |||||
command.cmd_params = prof_params; | |||||
command.module_index = profiler_config->config.dataTypeConfig; | |||||
GELOGI("Profiling will start, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), | |||||
prof_params[kDeviceListIndex].c_str(), command.module_index); | |||||
ret = graph_loader.CommandHandle(command); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Handle profiling command failed"); | |||||
return FAILED; | |||||
} | |||||
GELOGI("Successfully execute GraphProfStartProfiling."); | |||||
return SUCCESS; | |||||
} | |||||
Status aclgrphProfStop(aclgrphProfConfig *profiler_config) { | |||||
if (profiler_config == nullptr) { | |||||
GELOGE(PARAM_INVALID, "aclgrphProfConfig is invalid."); | |||||
return FAILED; | |||||
} | |||||
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||||
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||||
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); | |||||
return FAILED; | |||||
} | |||||
std::lock_guard<std::mutex> lock(g_prof_mutex_); | |||||
// if command mode is set, just return | |||||
if (ProfilingManager::Instance().ProfilingOn()) { | |||||
GELOGW("Graph prof finalize failed, cause profiling command pattern is running."); | |||||
return GE_PROF_MODE_CONFLICT; | |||||
} | |||||
if (!g_graph_prof_init_) { | |||||
GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize."); | |||||
return GE_PROF_NOT_INIT; | |||||
} | |||||
Status ret = ProfStopProfiling(&profiler_config->config); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Stop profiling failed, prof result = %d", ret); | |||||
return ret; | |||||
} | |||||
std::vector<string> prof_params; | |||||
if (!TransProfConfigToParam(profiler_config, prof_params)) { | |||||
GELOGE(PARAM_INVALID, "Transfer profilerConfig to string vector failed"); | |||||
return PARAM_INVALID; | |||||
} | |||||
GraphLoader graph_loader; | |||||
Command command; | |||||
command.cmd_params.clear(); | |||||
command.cmd_type = kProfilingStop; | |||||
command.cmd_params = prof_params; | |||||
command.module_index = profiler_config->config.dataTypeConfig; | |||||
GELOGI("Profiling will stop, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), | |||||
prof_params[kDeviceListIndex].c_str(), command.module_index); | |||||
ret = graph_loader.CommandHandle(command); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Handle profiling command failed"); | |||||
return FAILED; | |||||
} | |||||
GELOGI("Successfully execute GraphProfStopProfiling."); | |||||
return SUCCESS; | |||||
} | |||||
} // namespace ge |
@@ -4,6 +4,7 @@ LOCAL_PATH := $(call my-dir) | |||||
COMMON_LOCAL_SRC_FILES := \ | COMMON_LOCAL_SRC_FILES := \ | ||||
proto/ge_api.proto \ | proto/ge_api.proto \ | ||||
ge_api.cc \ | ge_api.cc \ | ||||
ge_prof.cc \ | |||||
COMMON_LOCAL_C_INCLUDES := \ | COMMON_LOCAL_C_INCLUDES := \ | ||||
@@ -69,7 +70,10 @@ LOCAL_SHARED_LIBRARIES := \ | |||||
libregister \ | libregister \ | ||||
libge_compiler \ | libge_compiler \ | ||||
libge_common \ | libge_common \ | ||||
libmsprof \ | |||||
stub/libascend_hal | |||||
LOCAL_STATIC_LIBRARIES := libmsprofiler | |||||
LOCAL_LDFLAGS := -lrt -ldl | LOCAL_LDFLAGS := -lrt -ldl | ||||
@@ -102,7 +106,9 @@ LOCAL_SHARED_LIBRARIES := \ | |||||
libruntime \ | libruntime \ | ||||
libge_compiler \ | libge_compiler \ | ||||
libge_common \ | libge_common \ | ||||
libmsprof | |||||
LOCAL_STATIC_LIBRARIES := libmsprofiler | |||||
LOCAL_LDFLAGS := -lrt -ldl | LOCAL_LDFLAGS := -lrt -ldl | ||||
LOCAL_CFLAGS += \ | LOCAL_CFLAGS += \ | ||||
@@ -27,6 +27,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
"context/ctx.cc" | "context/ctx.cc" | ||||
"cust_aicpu_kernel_store.cc" | "cust_aicpu_kernel_store.cc" | ||||
"debug/memory_dumper.cc" | "debug/memory_dumper.cc" | ||||
"dump/dump_properties.cc" | |||||
"fmk_error_codes.cc" | "fmk_error_codes.cc" | ||||
"formats/format_transfers/datatype_transfer.cc" | "formats/format_transfers/datatype_transfer.cc" | ||||
"formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc" | "formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc" | ||||
@@ -49,7 +49,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf | |||||
dump_properties_.ClearDumpPropertyValue(); | dump_properties_.ClearDumpPropertyValue(); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
dump_properties_.SetDumpStatus(dump_status); | |||||
dump_op_switch = dump_config.dump_op_switch; | dump_op_switch = dump_config.dump_op_switch; | ||||
dump_properties_.SetDumpOpSwitch(dump_op_switch); | |||||
if (dump_op_switch == kDumpoff && dump_config.dump_list.empty()) { | if (dump_op_switch == kDumpoff && dump_config.dump_list.empty()) { | ||||
GELOGE(PARAM_INVALID, "Dump list is invalid,dump_op_switch is %s", dump_op_switch.c_str()); | GELOGE(PARAM_INVALID, "Dump list is invalid,dump_op_switch is %s", dump_op_switch.c_str()); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
@@ -95,14 +98,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpManager::IsDumpOpen() { | |||||
std::lock_guard<std::mutex> lock(mutex_); | |||||
if (!dump_properties_.GetDumpPath().empty()) { | |||||
return true; | |||||
} | |||||
return false; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const DumpProperties &DumpManager::GetDumpProperties() { | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const DumpProperties &DumpManager::GetDumpProperties() { | ||||
std::lock_guard<std::mutex> lock(mutex_); | std::lock_guard<std::mutex> lock(mutex_); | ||||
return dump_properties_; | return dump_properties_; | ||||
@@ -28,7 +28,6 @@ class DumpManager { | |||||
static DumpManager &GetInstance(); | static DumpManager &GetInstance(); | ||||
Status SetDumpConf(const DumpConfig &dump_config); | Status SetDumpConf(const DumpConfig &dump_config); | ||||
bool IsDumpOpen(); | |||||
const DumpProperties &GetDumpProperties(); | const DumpProperties &GetDumpProperties(); | ||||
void SetModelName(const std::string &model_name); | void SetModelName(const std::string &model_name); | ||||
const std::string &GetModelName(); | const std::string &GetModelName(); | ||||
@@ -16,7 +16,6 @@ | |||||
#include "common/dump/dump_op.h" | #include "common/dump/dump_op.h" | ||||
#include "aicpu/common/aicpu_task_struct.h" | |||||
#include "common/dump/dump_manager.h" | #include "common/dump/dump_manager.h" | ||||
#include "common/ge/datatype_util.h" | #include "common/ge/datatype_util.h" | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
@@ -28,6 +27,7 @@ | |||||
#include "proto/ge_ir.pb.h" | #include "proto/ge_ir.pb.h" | ||||
#include "proto/op_mapping_info.pb.h" | #include "proto/op_mapping_info.pb.h" | ||||
#include "runtime/mem.h" | #include "runtime/mem.h" | ||||
#include "aicpu/common/aicpu_task_struct.h" | |||||
namespace { | namespace { | ||||
const uint32_t kAicpuLoadFlag = 1; | const uint32_t kAicpuLoadFlag = 1; | ||||
@@ -172,18 +172,18 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { | |||||
return RT_FAILED; | return RT_FAILED; | ||||
} | } | ||||
constexpr int32_t ioAddrNum = 2; | |||||
constexpr uint32_t argsSize = sizeof(aicpu::AicpuParamHead) + ioAddrNum * sizeof(uint64_t); | |||||
char args[argsSize] = {0}; | |||||
auto paramHead = reinterpret_cast<aicpu::AicpuParamHead *>(args); | |||||
paramHead->length = argsSize; | |||||
paramHead->ioAddrNum = ioAddrNum; | |||||
auto ioAddr = reinterpret_cast<uint64_t *>(args + sizeof(aicpu::AicpuParamHead)); | |||||
ioAddr[0] = reinterpret_cast<uintptr_t>(proto_dev_mem_); | |||||
ioAddr[1] = reinterpret_cast<uintptr_t>(proto_size_dev_mem_); | |||||
constexpr int32_t io_addr_num = 2; | |||||
constexpr uint32_t args_size = sizeof(aicpu::AicpuParamHead) + io_addr_num * sizeof(uint64_t); | |||||
char args[args_size] = {0}; | |||||
auto param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args); | |||||
param_head->length = args_size; | |||||
param_head->ioAddrNum = io_addr_num; | |||||
auto io_addr = reinterpret_cast<uint64_t *>(args + sizeof(aicpu::AicpuParamHead)); | |||||
io_addr[0] = reinterpret_cast<uintptr_t>(proto_dev_mem_); | |||||
io_addr[1] = reinterpret_cast<uintptr_t>(proto_size_dev_mem_); | |||||
rt_ret = rtCpuKernelLaunch(nullptr, kDumpKernelsDumpOp, | rt_ret = rtCpuKernelLaunch(nullptr, kDumpKernelsDumpOp, | ||||
1, // blockDim default 1 | 1, // blockDim default 1 | ||||
args, argsSize, | |||||
args, args_size, | |||||
nullptr, // no need smDesc | nullptr, // no need smDesc | ||||
stream_); | stream_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
@@ -31,7 +31,7 @@ | |||||
namespace { | namespace { | ||||
const std::string kEnableFlag = "1"; | const std::string kEnableFlag = "1"; | ||||
const std::string kDumpStatusOpen = "on"; | |||||
const uint32_t kAicoreOverflow = (0x1 << 0); | const uint32_t kAicoreOverflow = (0x1 << 0); | ||||
const uint32_t kAtomicOverflow = (0x1 << 1); | const uint32_t kAtomicOverflow = (0x1 << 1); | ||||
const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); | const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); | ||||
@@ -81,12 +81,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti | |||||
if (enable_dump_ == kEnableFlag) { | if (enable_dump_ == kEnableFlag) { | ||||
std::string dump_step; | std::string dump_step; | ||||
if (GetContext().GetOption(OPTION_EXEC_DUMP_STEP, dump_step) == GRAPH_SUCCESS) { | if (GetContext().GetOption(OPTION_EXEC_DUMP_STEP, dump_step) == GRAPH_SUCCESS) { | ||||
GELOGD("Get dump step %s successfully", dump_step.c_str()); | |||||
GELOGI("Get dump step %s successfully", dump_step.c_str()); | |||||
SetDumpStep(dump_step); | SetDumpStep(dump_step); | ||||
} | } | ||||
string dump_mode; | string dump_mode; | ||||
if (GetContext().GetOption(OPTION_EXEC_DUMP_MODE, dump_mode) == GRAPH_SUCCESS) { | if (GetContext().GetOption(OPTION_EXEC_DUMP_MODE, dump_mode) == GRAPH_SUCCESS) { | ||||
GELOGD("Get dump mode %s successfully", dump_mode.c_str()); | |||||
GELOGI("Get dump mode %s successfully", dump_mode.c_str()); | |||||
SetDumpMode(dump_mode); | SetDumpMode(dump_mode); | ||||
} | } | ||||
AddPropertyValue(DUMP_ALL_MODEL, {}); | AddPropertyValue(DUMP_ALL_MODEL, {}); | ||||
@@ -192,6 +192,37 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperti | |||||
return dump_mode_; | return dump_mode_; | ||||
} | } | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpStatus(const std::string &status) { | |||||
dump_status_ = status; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperties::GetDumpStatus() const { | |||||
return dump_status_; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch( | |||||
const std::string &dump_op_switch) { | |||||
dump_op_switch_ = dump_op_switch; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperties::GetDumpOpSwitch() const { | |||||
return dump_op_switch_; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsSingleOpNeedDump() const { | |||||
if (dump_op_switch_ == kDumpStatusOpen) { | |||||
return true; | |||||
} | |||||
return false; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsDumpOpen() const { | |||||
if (enable_dump_ == kEnableFlag || dump_status_ == kDumpStatusOpen) { | |||||
return true; | |||||
} | |||||
return false; | |||||
} | |||||
void DumpProperties::CopyFrom(const DumpProperties &other) { | void DumpProperties::CopyFrom(const DumpProperties &other) { | ||||
if (&other != this) { | if (&other != this) { | ||||
enable_dump_ = other.enable_dump_; | enable_dump_ = other.enable_dump_; | ||||
@@ -61,10 +61,26 @@ class DumpProperties { | |||||
const std::string &GetDumpMode() const; | const std::string &GetDumpMode() const; | ||||
void SetDumpStatus(const std::string &status); | |||||
const std::string &GetDumpStatus() const; | |||||
void SetDumpOpSwitch(const std::string &dump_op_switch); | |||||
const std::string &GetDumpOpSwitch() const; | |||||
bool IsOpDebugOpen() const { return is_op_debug_; } | bool IsOpDebugOpen() const { return is_op_debug_; } | ||||
bool IsDumpOpen() const; | |||||
bool IsSingleOpNeedDump() const; | |||||
uint32_t GetOpDebugMode() const { return op_debug_mode_; } | uint32_t GetOpDebugMode() const { return op_debug_mode_; } | ||||
const std::string &GetEnableDump() const { return enable_dump_; } | |||||
const std::string &GetEnableDumpDebug() const { return enable_dump_debug_; } | |||||
private: | private: | ||||
void CopyFrom(const DumpProperties &other); | void CopyFrom(const DumpProperties &other); | ||||
@@ -76,6 +92,8 @@ class DumpProperties { | |||||
std::string dump_path_; | std::string dump_path_; | ||||
std::string dump_step_; | std::string dump_step_; | ||||
std::string dump_mode_; | std::string dump_mode_; | ||||
std::string dump_status_; | |||||
std::string dump_op_switch_; | |||||
std::map<std::string, std::set<std::string>> model_dump_properties_map_; | std::map<std::string, std::set<std::string>> model_dump_properties_map_; | ||||
bool is_op_debug_ = false; | bool is_op_debug_ = false; | ||||
@@ -0,0 +1,21 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include "adx_datadump_server.h" | |||||
int AdxDataDumpServerUnInit() { return 0; } | |||||
int AdxDataDumpServerInit() { return 0; } |
@@ -34,7 +34,7 @@ std::map<ge::DataType, std::vector<ge::DataType>> g_reverse_translatable_data_ty | |||||
{ge::DT_INT32, {ge::DT_BOOL, ge::DT_INT64}}, | {ge::DT_INT32, {ge::DT_BOOL, ge::DT_INT64}}, | ||||
{ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}}; | {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}}; | ||||
static const std::map<ge::DataType, ge::proto::DataType> g_dump_data_type_map = { | |||||
std::map<ge::DataType, ge::proto::DataType> g_dump_data_type_map = { | |||||
// key:ge datatype,value:proto datatype | // key:ge datatype,value:proto datatype | ||||
{ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED}, | {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED}, | ||||
{ge::DT_FLOAT, ge::proto::DT_FLOAT}, | {ge::DT_FLOAT, ge::proto::DT_FLOAT}, | ||||
@@ -15,14 +15,15 @@ | |||||
*/ | */ | ||||
#include "common/ge/op_tiling_manager.h" | #include "common/ge/op_tiling_manager.h" | ||||
#include "common/util/error_manager/error_manager.h" | |||||
#include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
#include <string> | #include <string> | ||||
namespace { | namespace { | ||||
const char *const kEnvName = "ASCEND_OPP_PATH"; | const char *const kEnvName = "ASCEND_OPP_PATH"; | ||||
const std::string kDefaultPath = "/usr/local/Ascend/opp"; | const std::string kDefaultPath = "/usr/local/Ascend/opp"; | ||||
const std::string kDefaultBuiltInTilingPath = "/op_impl/built-in/liboptiling.so"; | |||||
const std::string kDefaultCustomTilingPath = "/op_impl/custom/liboptiling.so"; | |||||
const std::string kDefaultBuiltInTilingPath = "/op_impl/built-in/ai_core/tbe/op_tiling/liboptiling.so"; | |||||
const std::string kDefaultCustomTilingPath = "/op_impl/custom/ai_core/tbe/op_tiling/liboptiling.so"; | |||||
const uint8_t kPrefixIndex = 9; | const uint8_t kPrefixIndex = 9; | ||||
} // namespace | } // namespace | ||||
@@ -44,7 +45,9 @@ std::string OpTilingManager::GetPath() { | |||||
if (opp_path_env != nullptr) { | if (opp_path_env != nullptr) { | ||||
char resolved_path[PATH_MAX]; | char resolved_path[PATH_MAX]; | ||||
if (realpath(opp_path_env, resolved_path) == NULL) { | if (realpath(opp_path_env, resolved_path) == NULL) { | ||||
GELOGE(PARAM_INVALID, "Failed load tiling lib as env 'ASCEND_OPP_PATH'(%s) is invalid path.", opp_path_env); | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E19024", {"env", "value", "situation"}, | |||||
{"ASCEND_OPP_PATH", opp_path_env, "loading the tiling lib"}); | |||||
GELOGE(PARAM_INVALID, "Failed load tiling lib as env 'ASCEND_OPP_PATH'[%s] is invalid path.", opp_path_env); | |||||
return std::string(); | return std::string(); | ||||
} | } | ||||
opp_path = resolved_path; | opp_path = resolved_path; | ||||
@@ -12,6 +12,7 @@ GE_COMMON_LOCAL_SRC_FILES := \ | |||||
math/fp16_math.cc \ | math/fp16_math.cc \ | ||||
debug/memory_dumper.cc \ | debug/memory_dumper.cc \ | ||||
formats/utils/formats_trans_utils.cc \ | formats/utils/formats_trans_utils.cc \ | ||||
dump/dump_properties.cc \ | |||||
formats/format_transfers/datatype_transfer.cc \ | formats/format_transfers/datatype_transfer.cc \ | ||||
formats/format_transfers/format_transfer_transpose.cc \ | formats/format_transfers/format_transfer_transpose.cc \ | ||||
formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \ | formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \ | ||||
@@ -497,7 +497,25 @@ Status ModelCacheHelper::LoadJsonFromFile(const string &file_name, Json &json) c | |||||
GELOGW("Fail to open the file: %s.", path.c_str()); | GELOGW("Fail to open the file: %s.", path.c_str()); | ||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
ifs >> json; | |||||
try { | |||||
ifs >> json; | |||||
} catch (nlohmann::detail::parse_error e) { | |||||
GELOGW("Fail to load json from file, json throw an error:%s.", e.what()); | |||||
return INTERNAL_ERROR; | |||||
} catch (nlohmann::detail::invalid_iterator e) { | |||||
GELOGW("Fail to load json from file, json throw an error:%s.", e.what()); | |||||
return INTERNAL_ERROR; | |||||
} catch (nlohmann::detail::type_error e) { | |||||
GELOGW("Fail to load json from file, json throw an error:%s.", e.what()); | |||||
return INTERNAL_ERROR; | |||||
} catch (nlohmann::detail::out_of_range e) { | |||||
GELOGW("Fail to load json from file, json throw an error:%s.", e.what()); | |||||
return INTERNAL_ERROR; | |||||
} catch (nlohmann::detail::other_error e) { | |||||
GELOGW("Fail to load json from file, json throw an error:%s.", e.what()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
if (!json.is_object()) { | if (!json.is_object()) { | ||||
GELOGW("Fail to load the json file: %s.", path.c_str()); | GELOGW("Fail to load the json file: %s.", path.c_str()); | ||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
@@ -41,7 +41,22 @@ Status ModelHelper::SaveModelPartition(std::shared_ptr<OmFileSaveHelper> &om_fil | |||||
const uint8_t *data, size_t size) { | const uint8_t *data, size_t size) { | ||||
if (size < 1 || size > UINT32_MAX) { | if (size < 1 || size > UINT32_MAX) { | ||||
GELOGE(PARAM_INVALID, "Add model partition failed, partition size %zu invalid", size); | GELOGE(PARAM_INVALID, "Add model partition failed, partition size %zu invalid", size); | ||||
ErrorManager::GetInstance().ATCReportErrMessage("E19022"); | |||||
if (size > UINT32_MAX) { | |||||
string item = "item"; | |||||
if (type == MODEL_DEF) { | |||||
item = "model info"; | |||||
} else if (type == WEIGHTS_DATA) { | |||||
item = "weight data"; | |||||
} else if (type == TASK_INFO) { | |||||
item = "task info"; | |||||
} else if (type == TBE_KERNELS) { | |||||
item = "tbe kernels"; | |||||
} else if (type == CUST_AICPU_KERNELS) { | |||||
item = "aicpu kernels"; | |||||
} | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E19023", {"size", "item", "maxsize"}, | |||||
{std::to_string(size), item, std::to_string(UINT32_MAX)}); | |||||
} | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
if (data == nullptr) { | if (data == nullptr) { | ||||
@@ -263,7 +278,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c | |||||
} | } | ||||
Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | ||||
if (ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_) != SUCCESS) { | |||||
if (status != SUCCESS) { | |||||
GELOGE(status, "Parse model content failed!"); | GELOGE(status, "Parse model content failed!"); | ||||
return status; | return status; | ||||
} | } | ||||
@@ -54,15 +54,19 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager &ProfilingMana | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options) { | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options) { | ||||
#ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
vector<int32_t>().swap(device_id_); | vector<int32_t>().swap(device_id_); | ||||
device_id_.push_back(options.device_id); | |||||
job_id_ = options.job_id; | job_id_ = options.job_id; | ||||
GELOGI("ProfilingManager::Init job_id:%s", job_id_.c_str()); | |||||
Status ret; | Status ret; | ||||
if (!recv_profiling_config_.empty()) { | if (!recv_profiling_config_.empty()) { | ||||
GELOGI("Profiling json config from acl:%s", recv_profiling_config_.c_str()); | GELOGI("Profiling json config from acl:%s", recv_profiling_config_.c_str()); | ||||
ret = InitFromAclCfg(recv_profiling_config_); | ret = InitFromAclCfg(recv_profiling_config_); | ||||
} else { | } else { | ||||
ret = InitFromOptions(options); | ret = InitFromOptions(options); | ||||
if (ret == SUCCESS && is_load_profiling_) { | |||||
device_id_.push_back(options.device_id); | |||||
} | |||||
} | } | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Failed to init profiling."); | GELOGE(ret, "Failed to init profiling."); | ||||
@@ -543,25 +547,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr | |||||
return; | return; | ||||
} | } | ||||
GELOGI("current logic_device_id:%d", logic_device_id); | GELOGI("current logic_device_id:%d", logic_device_id); | ||||
uint32_t phy_device_id = 0; | |||||
rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); | |||||
return; | |||||
} | |||||
GELOGI("current phy_device_id:%d", phy_device_id); | |||||
if (!is_acl_api_mode_) { | if (!is_acl_api_mode_) { | ||||
auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id); | |||||
auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id); | |||||
if (ret == device_id_.end()) { | if (ret == device_id_.end()) { | ||||
GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); | GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); | ||||
return; | return; | ||||
} | } | ||||
} | } | ||||
GELOGI("start ProfilingTaskDescInfo."); | GELOGI("start ProfilingTaskDescInfo."); | ||||
ProfilingTaskDescInfo(task_desc_info, phy_device_id); | |||||
ProfilingTaskDescInfo(task_desc_info, logic_device_id); | |||||
GELOGI("start ProfilingGraphDescInfo."); | GELOGI("start ProfilingGraphDescInfo."); | ||||
ProfilingGraphDescInfo(compute_graph_desc_info, phy_device_id); | |||||
ProfilingGraphDescInfo(compute_graph_desc_info, logic_device_id); | |||||
GELOGI("Report profiling data for GE end."); | GELOGI("Report profiling data for GE end."); | ||||
#endif | #endif | ||||
} | } | ||||
@@ -855,14 +851,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin | |||||
} | } | ||||
GELOGI("Current logic_device_id:%d", logic_device_id); | GELOGI("Current logic_device_id:%d", logic_device_id); | ||||
uint32_t phy_device_id = 0; | |||||
rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); | |||||
} | |||||
GELOGI("Current phy_device_id:%d", phy_device_id); | |||||
bool execute_model_prof_on = false; | bool execute_model_prof_on = false; | ||||
auto iter = std::find(device_id_.begin(), device_id_.end(), phy_device_id); | |||||
auto iter = std::find(device_id_.begin(), device_id_.end(), logic_device_id); | |||||
if (iter != device_id_.end()) { | if (iter != device_id_.end()) { | ||||
execute_model_prof_on = true; | execute_model_prof_on = true; | ||||
} | } | ||||
@@ -172,6 +172,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &PropertiesManag | |||||
return dump_properties_map_[session_id]; | return dump_properties_map_[session_id]; | ||||
} | } | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void PropertiesManager::AddDumpProperties( | |||||
uint64_t session_id, const DumpProperties &dump_properties) { | |||||
std::lock_guard<std::mutex> lock(mutex_); | |||||
dump_properties_map_.emplace(session_id, dump_properties); | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void PropertiesManager::RemoveDumpProperties(uint64_t session_id) { | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void PropertiesManager::RemoveDumpProperties(uint64_t session_id) { | ||||
std::lock_guard<std::mutex> lock(mutex_); | std::lock_guard<std::mutex> lock(mutex_); | ||||
auto iter = dump_properties_map_.find(session_id); | auto iter = dump_properties_map_.find(session_id); | ||||
@@ -23,8 +23,8 @@ | |||||
#include <string> | #include <string> | ||||
#include <vector> | #include <vector> | ||||
#include "graph/op_desc.h" | |||||
#include "common/dump/dump_properties.h" | #include "common/dump/dump_properties.h" | ||||
#include "graph/op_desc.h" | |||||
namespace ge { | namespace ge { | ||||
// Configuration property management | // Configuration property management | ||||
@@ -83,6 +83,10 @@ class PropertiesManager { | |||||
void SetPropertyDelimiter(const std::string &de); | void SetPropertyDelimiter(const std::string &de); | ||||
DumpProperties &GetDumpProperties(uint64_t session_id); | DumpProperties &GetDumpProperties(uint64_t session_id); | ||||
const map<uint64_t, DumpProperties> &GetDumpPropertiesMap() { return dump_properties_map_; } | |||||
void AddDumpProperties(uint64_t session_id, const DumpProperties &dump_properties); | |||||
void RemoveDumpProperties(uint64_t session_id); | void RemoveDumpProperties(uint64_t session_id); | ||||
private: | private: | ||||
@@ -19,16 +19,16 @@ | |||||
#include <fcntl.h> | #include <fcntl.h> | ||||
#include <sys/stat.h> | #include <sys/stat.h> | ||||
#include <unistd.h> | |||||
#include <regex.h> | #include <regex.h> | ||||
#include <unistd.h> | |||||
#include <algorithm> | #include <algorithm> | ||||
#include <climits> | #include <climits> | ||||
#include <cstdlib> | #include <cstdlib> | ||||
#include <ctime> | #include <ctime> | ||||
#include <fstream> | #include <fstream> | ||||
#include "external/ge/ge_api_error_codes.h" | |||||
#include "common/util/error_manager/error_manager.h" | #include "common/util/error_manager/error_manager.h" | ||||
#include "external/ge/ge_api_error_codes.h" | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "framework/common/fmk_types.h" | #include "framework/common/fmk_types.h" | ||||
#include "framework/common/ge_inner_error_codes.h" | #include "framework/common/ge_inner_error_codes.h" | ||||
@@ -58,6 +58,7 @@ const int kWarningThreshold = 536870912 * 2; // 536870912 represent 512M | |||||
const int kMaxFileSizeLimit = INT_MAX; | const int kMaxFileSizeLimit = INT_MAX; | ||||
const int kMaxBuffSize = 256; | const int kMaxBuffSize = 256; | ||||
const char *const kPathValidReason = "The path can only contain 'a-z' 'A-Z' '0-9' '-' '.' '_' and chinese character"; | const char *const kPathValidReason = "The path can only contain 'a-z' 'A-Z' '0-9' '-' '.' '_' and chinese character"; | ||||
constexpr uint32_t kMaxConfigFileByte = 10 * 1024 * 1024; | |||||
} // namespace | } // namespace | ||||
namespace ge { | namespace ge { | ||||
@@ -471,7 +472,7 @@ FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::str | |||||
return true; | return true; | ||||
} | } | ||||
ret = regexec(®, str.c_str(), 0, nullptr, 0); | |||||
ret = regexec(®, str.c_str(), 0, NULL, 0); | |||||
if (ret) { | if (ret) { | ||||
regerror(ret, ®, ebuff, kMaxBuffSize); | regerror(ret, ®, ebuff, kMaxBuffSize); | ||||
GELOGE(ge::PARAM_INVALID, "regexec failed, reason: %s", ebuff); | GELOGE(ge::PARAM_INVALID, "regexec failed, reason: %s", ebuff); | ||||
@@ -482,4 +483,69 @@ FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::str | |||||
regfree(®); | regfree(®); | ||||
return true; | return true; | ||||
} | } | ||||
FMK_FUNC_HOST_VISIBILITY bool IsValidFile(const char *file_path) { | |||||
if (file_path == nullptr) { | |||||
GELOGE(PARAM_INVALID, "Config path is null."); | |||||
return false; | |||||
} | |||||
if (!CheckInputPathValid(file_path)) { | |||||
GELOGE(PARAM_INVALID, "Config path is invalid: %s", file_path); | |||||
return false; | |||||
} | |||||
// Normalize the path | |||||
std::string resolved_file_path = RealPath(file_path); | |||||
if (resolved_file_path.empty()) { | |||||
GELOGE(PARAM_INVALID, "Invalid input file path [%s], make sure that the file path is correct.", file_path); | |||||
return false; | |||||
} | |||||
mmStat_t stat = {0}; | |||||
int32_t ret = mmStatGet(resolved_file_path.c_str(), &stat); | |||||
if (ret != EN_OK) { | |||||
GELOGE(PARAM_INVALID, "cannot get config file status, which path is %s, maybe not exist, return %d, errcode %d", | |||||
resolved_file_path.c_str(), ret, mmGetErrorCode()); | |||||
return false; | |||||
} | |||||
if ((stat.st_mode & S_IFMT) != S_IFREG) { | |||||
GELOGE(PARAM_INVALID, "config file is not a common file, which path is %s, mode is %u", resolved_file_path.c_str(), | |||||
stat.st_mode); | |||||
return false; | |||||
} | |||||
if (stat.st_size > kMaxConfigFileByte) { | |||||
GELOGE(PARAM_INVALID, "config file %s size[%ld] is larger than max config file Bytes[%u]", | |||||
resolved_file_path.c_str(), stat.st_size, kMaxConfigFileByte); | |||||
return false; | |||||
} | |||||
return true; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status CheckPath(const char *path, size_t length) { | |||||
if (path == nullptr) { | |||||
GELOGE(PARAM_INVALID, "Config path is invalid."); | |||||
return PARAM_INVALID; | |||||
} | |||||
if (strlen(path) != length) { | |||||
GELOGE(PARAM_INVALID, "Path is invalid or length of config path is not equal to given length."); | |||||
return PARAM_INVALID; | |||||
} | |||||
if (length == 0 || length > MMPA_MAX_PATH) { | |||||
GELOGE(PARAM_INVALID, "Length of config path is invalid."); | |||||
return PARAM_INVALID; | |||||
} | |||||
INT32 is_dir = mmIsDir(path); | |||||
if (is_dir != EN_OK) { | |||||
GELOGE(PATH_INVALID, "Open directory %s failed, maybe it is not exit or not a dir", path); | |||||
return PATH_INVALID; | |||||
} | |||||
if (mmAccess2(path, M_R_OK) != EN_OK) { | |||||
GELOGE(PATH_INVALID, "Read path[%s] failed, errmsg[%s]", path, strerror(errno)); | |||||
return PATH_INVALID; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
} // namespace ge | } // namespace ge |
@@ -22,7 +22,7 @@ file(GLOB PROTO_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
"../../proto/insert_op.proto" | "../../proto/insert_op.proto" | ||||
"../../proto/op_mapping_info.proto" | "../../proto/op_mapping_info.proto" | ||||
"../../proto/ge_ir.proto" | "../../proto/ge_ir.proto" | ||||
"../proto/dump_task.proto" | |||||
"../../proto/dump_task.proto" | |||||
) | ) | ||||
file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | ||||
@@ -73,6 +73,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
"../graph/manager/trans_var_data_utils.cc" | "../graph/manager/trans_var_data_utils.cc" | ||||
"../graph/manager/util/debug.cc" | "../graph/manager/util/debug.cc" | ||||
"../hybrid/hybrid_davinci_model_stub.cc" | "../hybrid/hybrid_davinci_model_stub.cc" | ||||
"../hybrid/node_executor/aicpu/aicpu_ext_info.cc" | |||||
"../model/ge_model.cc" | "../model/ge_model.cc" | ||||
"../model/ge_root_model.cc" | "../model/ge_root_model.cc" | ||||
"../omm/csa_interact.cc" | "../omm/csa_interact.cc" | ||||
@@ -118,6 +119,8 @@ target_link_libraries(ge_executor | |||||
${slog} | ${slog} | ||||
${mmpa} | ${mmpa} | ||||
${msprof} | ${msprof} | ||||
${error_manager} | |||||
${ascend_hal} | |||||
rt | rt | ||||
dl) | dl) | ||||
@@ -182,6 +182,37 @@ bool IsDynamicImageSizeMatchModel(uint64_t image_height, uint64_t image_width, | |||||
GELOGE(ge::FAILED, "Dynamic resolution (%lu,%lu) can not match the gear of model.", image_height, image_width); | GELOGE(ge::FAILED, "Dynamic resolution (%lu,%lu) can not match the gear of model.", image_height, image_width); | ||||
return false; | return false; | ||||
} | } | ||||
bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims, const vector<vector<int64_t>> &batch_info) { | |||||
if (batch_info.empty()) { | |||||
GELOGE(ge::FAILED, "Dynamic batch info is empty."); | |||||
return false; | |||||
} | |||||
bool find_match = false; | |||||
for (auto resolution : batch_info) { | |||||
if (cur_dynamic_dims.size() != resolution.size()) { | |||||
GELOGE(ge::FAILED, "Cur dynamic dims param num is %zu, current resolution size is %zu.", cur_dynamic_dims.size(), | |||||
resolution.size()); | |||||
return false; | |||||
} | |||||
bool flag = true; | |||||
for (std::size_t i = 0; i < resolution.size(); ++i) { | |||||
if (cur_dynamic_dims[i] != static_cast<uint64_t>(resolution[i])) { | |||||
flag = false; | |||||
break; | |||||
} | |||||
} | |||||
if (flag) { | |||||
find_match = true; | |||||
break; | |||||
} | |||||
} | |||||
if (!find_match) { | |||||
GELOGE(ge::FAILED, "choose dynamic dims can not match the gear of model."); | |||||
} | |||||
return find_match; | |||||
} | |||||
} // namespace | } // namespace | ||||
namespace ge { | namespace ge { | ||||
@@ -347,9 +378,21 @@ Status GeExecutor::SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, u | |||||
vector<uint64_t> cur_dynamic_dims; | vector<uint64_t> cur_dynamic_dims; | ||||
Status ret = GetCurDynamicDims(model_id, dynamic_dims, cur_dynamic_dims); | Status ret = GetCurDynamicDims(model_id, dynamic_dims, cur_dynamic_dims); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "Set cur gear dynmaic dims failed"); | |||||
GELOGE(FAILED, "Set cur gear dynamic dims failed"); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
std::vector<std::vector<int64_t>> batch_info; | |||||
int32_t dynamic_type = static_cast<int32_t>(FIXED); | |||||
ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Get dynamic input info failed."); | |||||
return ret; | |||||
} | |||||
if (!IsDynmaicDimsSizeMatchModel(cur_dynamic_dims, batch_info)) { | |||||
GELOGE(PARAM_INVALID, "The current dynamic input does not match the gear of the model."); | |||||
return PARAM_INVALID; | |||||
} | |||||
ret = GraphExecutor::SetDynamicSize(model_id, cur_dynamic_dims, static_cast<int32_t>(DYNAMIC_DIMS)); | ret = GraphExecutor::SetDynamicSize(model_id, cur_dynamic_dims, static_cast<int32_t>(DYNAMIC_DIMS)); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
@@ -410,6 +453,10 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> & | |||||
for (std::size_t i = 0; i < all_data_dims.size(); ++i) { | for (std::size_t i = 0; i < all_data_dims.size(); ++i) { | ||||
if (all_data_dims[i] < 0) { | if (all_data_dims[i] < 0) { | ||||
cur_dynamic_dims.push_back(dynamic_dims[i]); | cur_dynamic_dims.push_back(dynamic_dims[i]); | ||||
} else if (static_cast<uint64_t>(all_data_dims[i]) != dynamic_dims[i]) { | |||||
GELOGE(PARAM_INVALID, "Static dims should be same, index: %zu value: %d should be %d", i, dynamic_dims[i], | |||||
all_data_dims[i]); | |||||
return PARAM_INVALID; | |||||
} | } | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -698,6 +745,22 @@ Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo | |||||
GELOGI("GetAIPPInfo succ."); | GELOGI("GetAIPPInfo succ."); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GeExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { | |||||
GELOGI("Begin to get aipp type."); | |||||
if (!isInit_) { | |||||
GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); | |||||
return GE_EXEC_NOT_INIT; | |||||
} | |||||
Status ret = GraphExecutor::GetAippType(model_id, index, type, aipp_index); | |||||
if (ret != SUCCESS) { | |||||
GELOGW("Get aipp type is not success."); | |||||
return ret; | |||||
} | |||||
GELOGI("Get aipp type success."); | |||||
return SUCCESS; | |||||
} | |||||
Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info) { | Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info) { | ||||
GELOGI("Begin to get dynamic batch output shape info"); | GELOGI("Begin to get dynamic batch output shape info"); | ||||
if (!isInit_) { | if (!isInit_) { | ||||
@@ -60,6 +60,7 @@ local_ge_executor_src_files := \ | |||||
../single_op/task/aicpu_task_builder.cc \ | ../single_op/task/aicpu_task_builder.cc \ | ||||
../single_op/task/aicpu_kernel_task_builder.cc \ | ../single_op/task/aicpu_kernel_task_builder.cc \ | ||||
../hybrid/hybrid_davinci_model_stub.cc\ | ../hybrid/hybrid_davinci_model_stub.cc\ | ||||
../hybrid/node_executor/aicpu/aicpu_ext_info.cc \ | |||||
local_ge_executor_c_include := \ | local_ge_executor_c_include := \ | ||||
proto/insert_op.proto \ | proto/insert_op.proto \ | ||||
@@ -87,6 +88,8 @@ local_ge_executor_shared_library := \ | |||||
libgraph \ | libgraph \ | ||||
libregister \ | libregister \ | ||||
libmsprof \ | libmsprof \ | ||||
liberror_manager \ | |||||
libascend_hal | |||||
local_ge_executor_ldflags := -lrt -ldl \ | local_ge_executor_ldflags := -lrt -ldl \ | ||||
@@ -102,6 +105,7 @@ LOCAL_SRC_FILES := $(local_ge_executor_src_files) | |||||
LOCAL_C_INCLUDES := $(local_ge_executor_c_include) | LOCAL_C_INCLUDES := $(local_ge_executor_c_include) | ||||
LOCAL_SHARED_LIBRARIES := $(local_ge_executor_shared_library) | LOCAL_SHARED_LIBRARIES := $(local_ge_executor_shared_library) | ||||
LOCAL_STATIC_LIBRARIES := libmsprofiler | |||||
ifeq ($(device_os),android) | ifeq ($(device_os),android) | ||||
LOCAL_LDFLAGS += -ldl | LOCAL_LDFLAGS += -ldl | ||||
LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog | LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog | ||||
@@ -137,6 +141,10 @@ LOCAL_SHARED_LIBRARIES := \ | |||||
libgraph \ | libgraph \ | ||||
libregister \ | libregister \ | ||||
libmsprof \ | libmsprof \ | ||||
liberror_manager \ | |||||
stub/libascend_hal | |||||
LOCAL_STATIC_LIBRARIES := libmsprofiler | |||||
LOCAL_LDFLAGS += $(local_ge_executor_ldflags) | LOCAL_LDFLAGS += $(local_ge_executor_ldflags) | ||||
@@ -29,6 +29,7 @@ COMMON_LOCAL_SRC_FILES := \ | |||||
common/dump/dump_properties.cc \ | common/dump/dump_properties.cc \ | ||||
common/dump/dump_manager.cc \ | common/dump/dump_manager.cc \ | ||||
common/dump/dump_op.cc \ | common/dump/dump_op.cc \ | ||||
common/dump/dump_server.cc \ | |||||
common/helper/model_cache_helper.cc \ | common/helper/model_cache_helper.cc \ | ||||
ge_local_engine/engine/host_cpu_engine.cc \ | ge_local_engine/engine/host_cpu_engine.cc \ | ||||
@@ -254,6 +255,7 @@ OME_HOST_SRC_FILES := \ | |||||
single_op/stream_resource.cc \ | single_op/stream_resource.cc \ | ||||
single_op/single_op_manager.cc \ | single_op/single_op_manager.cc \ | ||||
hybrid/hybrid_davinci_model_stub.cc \ | hybrid/hybrid_davinci_model_stub.cc \ | ||||
hybrid/node_executor/aicpu/aicpu_ext_info.cc \ | |||||
# graph/load/new_model_manager/task_info/hccl_task_info.cc | # graph/load/new_model_manager/task_info/hccl_task_info.cc | ||||
OME_DEVICE_SRC_FILES := $(OME_HOST_SRC_FILES) | OME_DEVICE_SRC_FILES := $(OME_HOST_SRC_FILES) | ||||
@@ -286,6 +288,7 @@ COMMON_LOCAL_C_INCLUDES := \ | |||||
$(TOPDIR)inc/runtime \ | $(TOPDIR)inc/runtime \ | ||||
$(TOPDIR)libc_sec/include \ | $(TOPDIR)libc_sec/include \ | ||||
$(TOPDIR)ops/built-in/op_proto/inc \ | $(TOPDIR)ops/built-in/op_proto/inc \ | ||||
$(TOPDIR)toolchain/ide/ide-daemon/external \ | |||||
third_party/json/include \ | third_party/json/include \ | ||||
third_party/protobuf/include \ | third_party/protobuf/include \ | ||||
third_party/opencv/include \ | third_party/opencv/include \ | ||||
@@ -340,6 +343,7 @@ DEVICE_LOCAL_C_INCLUDES := \ | |||||
$(TOPDIR)inc/runtime \ | $(TOPDIR)inc/runtime \ | ||||
$(TOPDIR)ops/built-in/op_proto/inc \ | $(TOPDIR)ops/built-in/op_proto/inc \ | ||||
$(TOPDIR)framework/domi \ | $(TOPDIR)framework/domi \ | ||||
$(TOPDIR)toolchain/ide/ide-daemon/external \ | |||||
third_party/json/include \ | third_party/json/include \ | ||||
third_party/protobuf/include \ | third_party/protobuf/include \ | ||||
third_party/opencv/include \ | third_party/opencv/include \ | ||||
@@ -351,7 +355,7 @@ LOCAL_MODULE := libge_compiler | |||||
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 | LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 | ||||
# from ome_inference.mk | # from ome_inference.mk | ||||
LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP | |||||
LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE | |||||
ifeq ($(DEBUG), 1) | ifeq ($(DEBUG), 1) | ||||
LOCAL_CFLAGS += -g -O0 | LOCAL_CFLAGS += -g -O0 | ||||
endif | endif | ||||
@@ -414,7 +418,7 @@ include $(CLEAR_VARS) | |||||
LOCAL_MODULE := libge_compiler | LOCAL_MODULE := libge_compiler | ||||
LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY -DNONSUPPORT_SAVE_TO_FILE | LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY -DNONSUPPORT_SAVE_TO_FILE | ||||
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 | LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 | ||||
LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP | |||||
LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE | |||||
LOCAL_CFLAGS += -DOMG_DEVICE_VERSION | LOCAL_CFLAGS += -DOMG_DEVICE_VERSION | ||||
LOCAL_CFLAGS += -O2 | LOCAL_CFLAGS += -O2 | ||||
LOCAL_MODULE_CLASS := SHARED_LIBRARIES | LOCAL_MODULE_CLASS := SHARED_LIBRARIES | ||||
@@ -42,7 +42,7 @@ include_directories(${CMAKE_BINARY_DIR}/proto/ge) | |||||
######### libge_local_engine.so ############# | ######### libge_local_engine.so ############# | ||||
add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | ||||
target_compile_definitions(ge_local_engine PRIVATE Werror) | |||||
target_compile_definitions(ge_local_engine PRIVATE Werror COMPILE_OMG_PACKAGE) | |||||
target_link_libraries(ge_local_engine | target_link_libraries(ge_local_engine | ||||
graph | graph | ||||
${PROTOBUF_LIBRARY} | ${PROTOBUF_LIBRARY} | ||||
@@ -25,40 +25,65 @@ | |||||
#include "common/ge/plugin_manager.h" | #include "common/ge/plugin_manager.h" | ||||
#include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
#include "common/fp16_t.h" | #include "common/fp16_t.h" | ||||
#include "common/math/math_util.h" | |||||
namespace { | namespace { | ||||
#define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ | |||||
case (DTYPE): { \ | |||||
GeTensorPtr ge_tensor = nullptr; \ | |||||
if (need_create_flag) { \ | |||||
int64_t data_num = out_desc.GetShape().IsScalar() ? 1 : out_desc.GetShape().GetShapeSize(); \ | |||||
std::unique_ptr<TYPE[]> buf(new (std::nothrow) TYPE[data_num]()); \ | |||||
if (buf == nullptr) { \ | |||||
GELOGE(MEMALLOC_FAILED, "New sizeof(T) * data_num(%zu) memory failed", \ | |||||
static_cast<size_t>(sizeof(TYPE) * data_num)); \ | |||||
return MEMALLOC_FAILED; \ | |||||
} \ | |||||
ge_tensor = MakeShared<GeTensor>(out_desc); \ | |||||
GE_CHECK_NOTNULL(ge_tensor); \ | |||||
GELOGI("node:%s allocate output %zu, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE)); \ | |||||
ge_tensor->SetData(reinterpret_cast<uint8_t *>(buf.get()), data_num * sizeof(TYPE)); \ | |||||
ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \ | |||||
ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \ | |||||
outputs.emplace_back(ge_tensor); \ | |||||
} else { \ | |||||
ge_tensor = outputs[i]; \ | |||||
GE_CHECK_NOTNULL(ge_tensor); \ | |||||
GELOGI("node:%s existed output %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i, \ | |||||
reinterpret_cast<const uint8_t *>(ge_tensor->GetData().data()), ge_tensor->GetData().size()); \ | |||||
} \ | |||||
auto tensor = TensorAdapter::AsTensor(*ge_tensor); \ | |||||
auto tensor_name = op_desc->GetOutputNameByIndex(i); \ | |||||
GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu", \ | |||||
op_desc->GetName().c_str(), i); \ | |||||
GELOGD("Successfully inserted output tensor. node = %s, index = %zu, output name = %s, addr = %p, size = %zu", \ | |||||
op_desc->GetName().c_str(), i, tensor_name.c_str(), tensor.GetData(), tensor.GetSize()); \ | |||||
named_outputs.emplace(tensor_name, tensor); \ | |||||
break; \ | |||||
#define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ | |||||
case (DTYPE): { \ | |||||
GeTensorPtr ge_tensor = nullptr; \ | |||||
if (need_create_flag) { \ | |||||
int64_t num_size = out_desc.GetShape().IsScalar() ? 1 : out_desc.GetShape().GetShapeSize(); \ | |||||
if (out_desc.GetShape().IsUnknownShape()) { \ | |||||
std::vector<std::pair<int64_t, int64_t>> range; \ | |||||
if (out_desc.GetShapeRange(range) != GRAPH_SUCCESS) { \ | |||||
GELOGE(INTERNAL_ERROR, "Get shape range failed, node:%s", op_desc->GetName().c_str()); \ | |||||
return INTERNAL_ERROR; \ | |||||
} \ | |||||
int64_t max_range_size = 1; \ | |||||
for (const auto &item : range) { \ | |||||
FMK_INT64_MULCHECK(max_range_size, item.second); \ | |||||
max_range_size *= item.second; \ | |||||
} \ | |||||
num_size = max_range_size; \ | |||||
} \ | |||||
if (num_size < 0) { \ | |||||
GELOGE(INTERNAL_ERROR, "node:%s, get size for output %zu failed, num=%lld", op_desc->GetName().c_str(), i, \ | |||||
num_size); \ | |||||
return INTERNAL_ERROR; \ | |||||
} \ | |||||
auto data_num = static_cast<uint64_t>(num_size); \ | |||||
GELOGI("node:%s allocate output %zu start, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE)); \ | |||||
std::unique_ptr<TYPE[]> buf(new (std::nothrow) TYPE[data_num]()); \ | |||||
if (buf == nullptr) { \ | |||||
GELOGE(MEMALLOC_FAILED, "New sizeof(T) * data_num(%zu) memory failed", \ | |||||
static_cast<size_t>(sizeof(TYPE) * data_num)); \ | |||||
return MEMALLOC_FAILED; \ | |||||
} \ | |||||
ge_tensor = MakeShared<GeTensor>(out_desc); \ | |||||
GE_CHECK_NOTNULL(ge_tensor); \ | |||||
GELOGI("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, \ | |||||
data_num * sizeof(TYPE)); \ | |||||
if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) { \ | |||||
GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str()); \ | |||||
return MEMALLOC_FAILED; \ | |||||
} \ | |||||
ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \ | |||||
ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \ | |||||
outputs.emplace_back(ge_tensor); \ | |||||
} else { \ | |||||
ge_tensor = outputs[i]; \ | |||||
GE_CHECK_NOTNULL(ge_tensor); \ | |||||
GELOGI("node:%s existed output %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i, \ | |||||
reinterpret_cast<const uint8_t *>(ge_tensor->GetData().data()), ge_tensor->GetData().size()); \ | |||||
} \ | |||||
auto tensor = TensorAdapter::AsTensor(*ge_tensor); \ | |||||
auto tensor_name = op_desc->GetOutputNameByIndex(i); \ | |||||
GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu", \ | |||||
op_desc->GetName().c_str(), i); \ | |||||
GELOGD("Successfully inserted output tensor. node = %s, index = %zu, output name = %s, addr = %p, size = %zu", \ | |||||
op_desc->GetName().c_str(), i, tensor_name.c_str(), tensor.GetData(), tensor.GetSize()); \ | |||||
named_outputs.emplace(tensor_name, tensor); \ | |||||
break; \ | |||||
} | } | ||||
} // namespace | } // namespace | ||||
@@ -42,7 +42,7 @@ include ${BUILD_HOST_SHARED_LIBRARY} | |||||
include $(CLEAR_VARS) | include $(CLEAR_VARS) | ||||
LOCAL_MODULE := atclib/libge_local_engine | LOCAL_MODULE := atclib/libge_local_engine | ||||
LOCAL_CFLAGS += -Werror | LOCAL_CFLAGS += -Werror | ||||
LOCAL_CFLAGS += -std=c++11 | |||||
LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE | |||||
LOCAL_LDFLAGS := | LOCAL_LDFLAGS := | ||||
LOCAL_STATIC_LIBRARIES := | LOCAL_STATIC_LIBRARIES := | ||||
@@ -1,5 +1,5 @@ | |||||
LOCAL_PATH := $(call my-dir) | LOCAL_PATH := $(call my-dir) | ||||
include $(LOCAL_PATH)/stub/Makefile | |||||
LIBGE_LOCAL_SRC_FILES := \ | LIBGE_LOCAL_SRC_FILES := \ | ||||
proto/fusion_model.proto \ | proto/fusion_model.proto \ | ||||
proto/optimizer_priority.proto \ | proto/optimizer_priority.proto \ | ||||
@@ -296,6 +296,7 @@ LIBGE_LOCAL_SRC_FILES := \ | |||||
LIBCLIENT_LOCAL_SRC_FILES := \ | LIBCLIENT_LOCAL_SRC_FILES := \ | ||||
proto/ge_api.proto \ | proto/ge_api.proto \ | ||||
client/ge_api.cc \ | client/ge_api.cc \ | ||||
client/ge_prof.cc \ | |||||
RUNNER_LOCAL_C_INCLUDES := \ | RUNNER_LOCAL_C_INCLUDES := \ | ||||
$(LOCAL_PATH) ./ \ | $(LOCAL_PATH) ./ \ | ||||
@@ -312,6 +313,7 @@ RUNNER_LOCAL_C_INCLUDES := \ | |||||
$(TOPDIR)libc_sec/include \ | $(TOPDIR)libc_sec/include \ | ||||
$(TOPDIR)ops/built-in/op_proto/inc \ | $(TOPDIR)ops/built-in/op_proto/inc \ | ||||
$(TOPDIR)framework/domi/analyzer \ | $(TOPDIR)framework/domi/analyzer \ | ||||
$(TOPDIR)toolchain/ide/ide-daemon/external \ | |||||
proto/fwk_adapter.proto \ | proto/fwk_adapter.proto \ | ||||
proto/ge_ir.proto \ | proto/ge_ir.proto \ | ||||
proto/insert_op.proto \ | proto/insert_op.proto \ | ||||
@@ -353,6 +355,8 @@ LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES) | |||||
LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) | LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) | ||||
LOCAL_STATIC_LIBRARIES := libge_memory \ | LOCAL_STATIC_LIBRARIES := libge_memory \ | ||||
libadump_server \ | |||||
libmsprofiler \ | |||||
LOCAL_SHARED_LIBRARIES := \ | LOCAL_SHARED_LIBRARIES := \ | ||||
libc_sec \ | libc_sec \ | ||||
@@ -371,6 +375,7 @@ LOCAL_LDFLAGS := -lrt -ldl | |||||
LOCAL_SHARED_LIBRARIES += \ | LOCAL_SHARED_LIBRARIES += \ | ||||
libruntime \ | libruntime \ | ||||
libresource \ | libresource \ | ||||
stub/libascend_hal \ | |||||
include $(BUILD_HOST_SHARED_LIBRARY) | include $(BUILD_HOST_SHARED_LIBRARY) | ||||
@@ -388,7 +393,8 @@ endif | |||||
LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) | LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) | ||||
LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc | |||||
LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc \ | |||||
../../out/ge/lib64/stub/ge_prof.cc \ | |||||
LOCAL_SHARED_LIBRARIES := | LOCAL_SHARED_LIBRARIES := | ||||
@@ -438,6 +444,7 @@ LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES) | |||||
LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) | LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) | ||||
LOCAL_STATIC_LIBRARIES := libge_memory \ | LOCAL_STATIC_LIBRARIES := libge_memory \ | ||||
libadump_server \ | |||||
LOCAL_SHARED_LIBRARIES := \ | LOCAL_SHARED_LIBRARIES := \ | ||||
libc_sec \ | libc_sec \ | ||||
@@ -450,6 +457,7 @@ LOCAL_LDFLAGS := -lrt -ldl | |||||
LOCAL_SHARED_LIBRARIES += \ | LOCAL_SHARED_LIBRARIES += \ | ||||
libruntime \ | libruntime \ | ||||
libresource \ | libresource \ | ||||
stub/libascend_hal \ | |||||
include $(BUILD_HOST_STATIC_LIBRARY) | include $(BUILD_HOST_STATIC_LIBRARY) | ||||
@@ -469,6 +477,7 @@ LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES) | |||||
LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) | LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) | ||||
LOCAL_STATIC_LIBRARIES := libge_memory \ | LOCAL_STATIC_LIBRARIES := libge_memory \ | ||||
libadump_server \ | |||||
LOCAL_SHARED_LIBRARIES := \ | LOCAL_SHARED_LIBRARIES := \ | ||||
libc_sec \ | libc_sec \ | ||||
@@ -481,5 +490,6 @@ LOCAL_LDFLAGS := -lrt -ldl | |||||
LOCAL_SHARED_LIBRARIES += \ | LOCAL_SHARED_LIBRARIES += \ | ||||
libruntime \ | libruntime \ | ||||
libresource \ | libresource \ | ||||
libascend_hal \ | |||||
include $(BUILD_STATIC_LIBRARY) | include $(BUILD_STATIC_LIBRARY) |
@@ -136,6 +136,13 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen | |||||
bool attr) { | bool attr) { | ||||
GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | ||||
GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | ||||
auto format = tensor.GetFormat(); | |||||
auto data_type = tensor.GetDataType(); | |||||
if (format == FORMAT_RESERVED && data_type == DT_UNDEFINED) { | |||||
return SUCCESS; | |||||
} | |||||
string op_type; | string op_type; | ||||
if (!AttrUtils::GetStr(tensor, kAttrOpType, op_type) || op_type.empty()) { | if (!AttrUtils::GetStr(tensor, kAttrOpType, op_type) || op_type.empty()) { | ||||
op_type = DATA; | op_type = DATA; | ||||
@@ -521,8 +528,8 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | ||||
bool is_offline) { | bool is_offline) { | ||||
GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); | ||||
if (!inputs.empty() && (inputs.size() != op_desc->GetInputsSize())) { | |||||
GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetInputsSize()); | |||||
if (!inputs.empty() && (inputs.size() != op_desc->GetAllInputsSize())) { | |||||
GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetAllInputsSize()); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) { | if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) { | ||||
@@ -413,7 +413,8 @@ BlockMemAssigner::BlockMemAssigner(ComputeGraphPtr compute_graph, const map<stri | |||||
life_time_(0) {} | life_time_(0) {} | ||||
BlockMemAssigner::~BlockMemAssigner() { | BlockMemAssigner::~BlockMemAssigner() { | ||||
for (MemoryBlock *memory_block : memory_blocks_) { | |||||
GELOGD("blocks_store_ size : %lu", blocks_store_.size()); | |||||
for (MemoryBlock *memory_block : blocks_store_) { | |||||
GE_DELETE_NEW_SINGLE(memory_block); | GE_DELETE_NEW_SINGLE(memory_block); | ||||
} | } | ||||
} | } | ||||
@@ -544,7 +545,7 @@ bool CanReuseBySize(const map<string, uint64_t> &reusable_block_counts, const Me | |||||
} | } | ||||
bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, | bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, | ||||
uint32_t &peer_input_index) { | |||||
uint32_t &peer_input_index, bool &no_need_assign_memory) { | |||||
if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { | if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { | ||||
return false; | return false; | ||||
} | } | ||||
@@ -571,6 +572,11 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou | |||||
// If GetBool fail, is_input_continuous is false. | // If GetBool fail, is_input_continuous is false. | ||||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); | (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); | ||||
GE_IF_BOOL_EXEC(is_input_continuous && CheckIsZeroMemNodeType(peer_node->GetType()), | |||||
GELOGI("Node[%s] output[%u] no_need_assign_memory.", n->GetName().c_str(), out_index); | |||||
no_need_assign_memory = true; return false;); | |||||
if (is_input_continuous) { | if (is_input_continuous) { | ||||
if (n->GetOwnerComputeGraph() != nullptr) { | if (n->GetOwnerComputeGraph() != nullptr) { | ||||
string graph_name = n->GetOwnerComputeGraph()->GetName(); | string graph_name = n->GetOwnerComputeGraph()->GetName(); | ||||
@@ -828,6 +834,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
} | } | ||||
} | } | ||||
memory_blocks_.emplace_back(block); | memory_blocks_.emplace_back(block); | ||||
blocks_store_.emplace_back(block); | |||||
return block; | return block; | ||||
} | } | ||||
@@ -1143,8 +1150,10 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
bool out_node_set_continuous_input = false; | bool out_node_set_continuous_input = false; | ||||
bool no_need_assign_memory = ((size == 0) || CheckIsZeroMemNodeType(node->GetType())); | bool no_need_assign_memory = ((size == 0) || CheckIsZeroMemNodeType(node->GetType())); | ||||
if (!no_need_assign_memory) { | if (!no_need_assign_memory) { | ||||
out_node_set_continuous_input = IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index); | |||||
no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input); | |||||
out_node_set_continuous_input = | |||||
IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index, no_need_assign_memory); | |||||
GE_IF_BOOL_EXEC(!no_need_assign_memory, | |||||
no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input);); | |||||
} | } | ||||
no_need_assign_memory = (no_need_assign_memory || IsKnownSubgraphData(node)); | no_need_assign_memory = (no_need_assign_memory || IsKnownSubgraphData(node)); | ||||
if (no_need_assign_memory) { | if (no_need_assign_memory) { | ||||
@@ -1296,6 +1305,11 @@ void MergeBlocks(std::vector<MemoryBlock *> &dest, std::vector<MemoryBlock *> &s | |||||
return; | return; | ||||
} | } | ||||
if (dest[i] != nullptr && src[i] != nullptr) { | if (dest[i] != nullptr && src[i] != nullptr) { | ||||
if (!dest[i]->reuse_mem_ || !src[i]->reuse_mem_) { | |||||
GELOGD("Diff batch's workspace can't be reused, i: %zu, dest[i]: %s, stream: %ld, src[i]: %s, stream: %ld.", i, | |||||
dest[i]->String().c_str(), dest[i]->stream_id_, src[i]->String().c_str(), src[i]->stream_id_); | |||||
continue; | |||||
} | |||||
for (auto &symbol : src[i]->SymbolList()) { | for (auto &symbol : src[i]->SymbolList()) { | ||||
dest[i]->AddSymbol(symbol); | dest[i]->AddSymbol(symbol); | ||||
} | } | ||||
@@ -259,6 +259,7 @@ class BlockMemAssigner : public MemAssigner { | |||||
ge::ComputeGraphPtr compute_graph_; | ge::ComputeGraphPtr compute_graph_; | ||||
std::vector<MemoryBlock *> memory_blocks_; | std::vector<MemoryBlock *> memory_blocks_; | ||||
std::vector<MemoryBlock *> blocks_store_; | |||||
std::vector<NodeTypeIndex> zero_memory_list_; | std::vector<NodeTypeIndex> zero_memory_list_; | ||||
@@ -357,7 +358,7 @@ class BlockMemAssigner : public MemAssigner { | |||||
bool IsZeroCopyBlock(const NodePtr &node, bool continuous); | bool IsZeroCopyBlock(const NodePtr &node, bool continuous); | ||||
bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, | bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, | ||||
uint32_t &peer_input_index); | |||||
uint32_t &peer_input_index, bool &no_need_assign_memory); | |||||
/// | /// | ||||
/// @ingroup GE | /// @ingroup GE | ||||
@@ -39,6 +39,33 @@ const size_t kVirtualInputNodeOutputSize = 1; | |||||
const size_t kVirtualOutputNodeInputSize = 1; | const size_t kVirtualOutputNodeInputSize = 1; | ||||
const size_t kVirtualNodeDataIndex = 0; | const size_t kVirtualNodeDataIndex = 0; | ||||
const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_"; | const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_"; | ||||
int64_t GetSymbolOutputOffset(const std::map<std::string, std::string> &anchor_to_symbol, | |||||
const std::map<std::string, std::list<ge::NodeIndexIO>> &symbol_to_anchors, | |||||
const ge::NodePtr &node, const uint32_t i) { | |||||
ge::NodeIndexIO cur_node_index_io(node, i, ge::kOut); | |||||
auto iter1 = anchor_to_symbol.find(cur_node_index_io.ToString()); | |||||
if (iter1 == anchor_to_symbol.end()) { | |||||
return ge::kInvalidOffset; | |||||
} | |||||
auto out_symbol = iter1->second; | |||||
auto iter2 = symbol_to_anchors.find(out_symbol); | |||||
if (iter2 == symbol_to_anchors.end()) { | |||||
return ge::kInvalidOffset; | |||||
} | |||||
for (const auto &node_index_io : iter2->second) { | |||||
if (node_index_io.value_ == out_symbol) { | |||||
vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset(); | |||||
vector<int64_t> symbol_output_list = node_index_io.node_->GetOpDesc()->GetOutputOffset(); | |||||
if (node_index_io.index_ >= symbol_output_list.size()) { | |||||
return ge::kInvalidOffset; | |||||
} | |||||
GELOGD("Node %s %uth output offset is %ld, Symbol %s output offset is %ld.", node->GetName().c_str(), i, | |||||
output_list[i], iter2->first.c_str(), symbol_output_list.at(node_index_io.index_)); | |||||
return symbol_output_list.at(node_index_io.index_); | |||||
} | |||||
} | |||||
return ge::kInvalidOffset; | |||||
} | |||||
} // namespace | } // namespace | ||||
namespace ge { | namespace ge { | ||||
Status VariableMemoryAssigner::Assign() { | Status VariableMemoryAssigner::Assign() { | ||||
@@ -227,7 +254,10 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, size_t &mem_offse | |||||
if (mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) { | if (mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) { | ||||
GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", mem_offset, | GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", mem_offset, | ||||
VarManager::Instance(session_id)->GetGraphMemoryMaxSize()); | VarManager::Instance(session_id)->GetGraphMemoryMaxSize()); | ||||
ErrorManager::GetInstance().ATCReportErrMessage("E19022"); | |||||
ErrorManager::GetInstance().ATCReportErrMessage( | |||||
"E19022", {"size", "item", "maxsize"}, | |||||
{std::to_string(mem_offset), "featuremap", | |||||
std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())}); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -292,11 +322,19 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||||
GELOGE(ge::FAILED, | GELOGE(ge::FAILED, | ||||
"There is an atomic conflict between the current node and the peer out node, not supported!"); | "There is an atomic conflict between the current node and the peer out node, not supported!"); | ||||
return ge::FAILED; | return ge::FAILED; | ||||
} else if (is_loop_graph) { | |||||
GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, mem_clean_start)); | |||||
} else { | |||||
GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {mem_clean_start}, {mem_clean_size}), | |||||
"SetAtomicCleanAttr failed."); | |||||
} | |||||
const auto &in_control_anchor = node->GetInControlAnchor(); | |||||
GE_CHECK_NOTNULL(in_control_anchor); | |||||
for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { | |||||
auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); | |||||
if (peer_out_node->GetType() == ATOMICADDRCLEAN) { | |||||
ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str()); | |||||
return ret; | |||||
} | |||||
} | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -810,68 +848,37 @@ Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map<string, vector<NodePt | |||||
} | } | ||||
Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | ||||
GE_CHECK_NOTNULL(compute_graph_); | |||||
// Atomic op memory start addr | |||||
int64_t atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_); | |||||
GELOGI("Begin to reAssign atomic memory, atomic initial address mem_offset = %zu!", memory_offset_[0].mem_offset_); | |||||
vector<NodePtr> connect_netoutput_nodes; | |||||
for (auto &node : compute_graph_->GetAllNodes()) { | |||||
auto node_op_desc = node->GetOpDesc(); | |||||
if (node_op_desc == nullptr) { | |||||
continue; | |||||
} | |||||
bool is_atomic = false; | |||||
// If GetBool fail, is_atomic is false. | |||||
(void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic); | |||||
if (!is_atomic) { | |||||
continue; | |||||
} | |||||
bool is_ref = false; | |||||
// If GetBool fail, is_ref is false. | |||||
(void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_REFERENCE, is_ref); | |||||
if (is_ref) { | |||||
GELOGE(ge::PARAM_INVALID, "The node %s cannot have both atomic and ref attribute.", | |||||
node_op_desc->GetName().c_str()); | |||||
return ge::PARAM_INVALID; | |||||
} | |||||
vector<int> is_connect_netoutput; | |||||
// If GetBool fail, attr is_connect_netoutput is an empty vector. | |||||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput); | |||||
if (!is_connect_netoutput.empty()) { | |||||
connect_netoutput_nodes.emplace_back(node); | |||||
continue; | |||||
} | |||||
map<NodePtr, vector<NodePtr>> normal_atomic_and_clean_nodes_map; | |||||
vector<NodePtr> connecting_output_atomic_nodes; | |||||
Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes); | |||||
if (status != SUCCESS) { | |||||
GELOGE(status, "Failed to filter atomic nodes for memory assignment."); | |||||
return status; | |||||
} | |||||
// Atomic op memory start addr of loop graph | |||||
int64_t loop_graph_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_); | |||||
vector<int64_t> mem_offset_end; | |||||
if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) { | |||||
GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
for (auto &iter : normal_atomic_and_clean_nodes_map) { | |||||
int64_t atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_); | |||||
GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start); | |||||
/// In networks with loop op, atomic op uses atomic_addr_clean op independently, | |||||
/// so we need to set the attr separately. | |||||
if (is_loop_graph) { | |||||
GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, loop_graph_atomic_mem_start)); | |||||
for (auto &atomic_node : iter.second) { | |||||
vector<int64_t> mem_offset_end; | |||||
status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end); | |||||
if (status != SUCCESS) { | |||||
GELOGE(status, "Assign atomic output and workspace memory failed, node name is %s.", | |||||
atomic_node->GetName().c_str()); | |||||
return status; | |||||
} | |||||
} | } | ||||
} | |||||
// In networks without loop op, the same atomic addr clean op is used for atomic op | |||||
if (!is_loop_graph) { | |||||
// Set the address attr of atomic clean operator | |||||
int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start; | |||||
if (atomic_mem_size != 0) { | |||||
GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {atomic_mem_start}, {atomic_mem_size}), | |||||
"SetAtomicCleanAttr failed."); | |||||
int64_t atomic_mem_size = static_cast<int64_t>(memory_offset_[0].mem_offset_) - atomic_mem_start; | |||||
status = SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}); | |||||
if (status != SUCCESS) { | |||||
GELOGE(status, "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str()); | |||||
return status; | |||||
} | } | ||||
} | } | ||||
if (AssignConnectNetOutputAtomicMemory(connect_netoutput_nodes) != SUCCESS) { | |||||
if (AssignConnectNetOutputAtomicMemory(connecting_output_atomic_nodes) != SUCCESS) { | |||||
GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput."); | GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput."); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -879,6 +886,55 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(map<NodePtr, vector<NodePtr>> &normal_atomic_nodes_map, | |||||
vector<NodePtr> &connecting_output_atomic_nodes) { | |||||
GE_CHECK_NOTNULL(compute_graph_); | |||||
for (const auto &node : compute_graph_->GetAllNodes()) { | |||||
if (node->GetType() == ATOMICADDRCLEAN) { | |||||
vector<NodePtr> tmp_normal_atomic_nodes; | |||||
const auto &out_control_anchor = node->GetOutControlAnchor(); | |||||
GE_CHECK_NOTNULL(out_control_anchor); | |||||
for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) { | |||||
if (peer_in_control_anchor != nullptr) { | |||||
auto peer_in_node = peer_in_control_anchor->GetOwnerNode(); | |||||
auto peer_in_node_desc = peer_in_node->GetOpDesc(); | |||||
if (peer_in_node_desc != nullptr) { | |||||
bool is_atomic_node = false; | |||||
// If GetBool fail, is_atomic_node is false. | |||||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node); | |||||
if (is_atomic_node) { | |||||
bool is_reference = false; | |||||
// If GetBool fail, is_reference is false. | |||||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference); | |||||
if (is_reference) { | |||||
GELOGE(ge::PARAM_INVALID, "The node %s cannot have both atomic and is_reference attribute.", | |||||
peer_in_node_desc->GetName().c_str()); | |||||
return ge::PARAM_INVALID; | |||||
} | |||||
vector<int> is_connecting_output; | |||||
// If GetBool fail, attr is_connecting_output is an empty vector. | |||||
(void)ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output); | |||||
if (is_connecting_output.empty()) { | |||||
tmp_normal_atomic_nodes.emplace_back(peer_in_node); | |||||
continue; | |||||
} | |||||
connecting_output_atomic_nodes.emplace_back(peer_in_node); | |||||
tmp_normal_atomic_nodes.clear(); | |||||
break; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
if (!tmp_normal_atomic_nodes.empty()) { | |||||
normal_atomic_nodes_map[node] = tmp_normal_atomic_nodes; | |||||
} | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node, | Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node, | ||||
vector<int64_t> &mem_offset_end) { | vector<int64_t> &mem_offset_end) { | ||||
auto node_op_desc = node->GetOpDesc(); | auto node_op_desc = node->GetOpDesc(); | ||||
@@ -908,6 +964,8 @@ Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodeP | |||||
GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str()); | GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str()); | ||||
return ret; | return ret; | ||||
} | } | ||||
} else { | |||||
GELOGW("Current atomic node %s does not have attr ATOMIC_WORKSPACE_INFO.", node->GetName().c_str()); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -1186,6 +1244,12 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt | |||||
} | } | ||||
Status GraphMemoryAssigner::CheckOffset() { | Status GraphMemoryAssigner::CheckOffset() { | ||||
std::map<std::string, std::string> anchor_to_symbol; | |||||
std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors; | |||||
if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { | |||||
GELOGE(FAILED, "Get ref-mapping for graph %s failed.", compute_graph_->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | ||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
vector<int64_t> input_list = node->GetOpDesc()->GetInputOffset(); | vector<int64_t> input_list = node->GetOpDesc()->GetInputOffset(); | ||||
@@ -1195,13 +1259,26 @@ Status GraphMemoryAssigner::CheckOffset() { | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} | } | ||||
bool need_update_output = false; | |||||
vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset(); | vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset(); | ||||
for (auto output : output_list) { | |||||
if (output == ge::kInvalidOffset) { | |||||
for (uint32_t i = 0; i < output_list.size(); ++i) { | |||||
if (output_list[i] == ge::kInvalidOffset) { | |||||
GELOGE(FAILED, "Invalid offset in node: %s output: %ld.", node->GetName().c_str(), ge::kInvalidOffset); | GELOGE(FAILED, "Invalid offset in node: %s output: %ld.", node->GetName().c_str(), ge::kInvalidOffset); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
if (node->GetType() == IDENTITY || node->GetType() == READVARIABLEOP) { | |||||
auto symbol_offset = GetSymbolOutputOffset(anchor_to_symbol, symbol_to_anchors, node, i); | |||||
if (symbol_offset != ge::kInvalidOffset && output_list[i] != symbol_offset) { | |||||
output_list[i] = symbol_offset; | |||||
need_update_output = true; | |||||
} | |||||
} | |||||
} | } | ||||
if (need_update_output) { | |||||
node->GetOpDesc()->SetOutputOffset(output_list); | |||||
} | |||||
vector<int64_t> workspace_list = node->GetOpDesc()->GetWorkspace(); | vector<int64_t> workspace_list = node->GetOpDesc()->GetWorkspace(); | ||||
for (auto workspace : workspace_list) { | for (auto workspace : workspace_list) { | ||||
if (workspace == ge::kInvalidOffset) { | if (workspace == ge::kInvalidOffset) { | ||||
@@ -1280,6 +1357,7 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< | |||||
vector<int64_t> memory_type; | vector<int64_t> memory_type; | ||||
auto tmp_op_desc = node->GetOpDesc(); | auto tmp_op_desc = node->GetOpDesc(); | ||||
origin_input_list = tmp_op_desc->GetInputOffset(); | origin_input_list = tmp_op_desc->GetInputOffset(); | ||||
int64_t valid_input_index = 0; | |||||
bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type); | bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type); | ||||
for (const auto &anchor : node->GetAllInDataAnchors()) { | for (const auto &anchor : node->GetAllInDataAnchors()) { | ||||
vector<int64_t> output_list; | vector<int64_t> output_list; | ||||
@@ -1293,8 +1371,9 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< | |||||
auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc(); | auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(last_peer_out_op_desc); | GE_CHECK_NOTNULL(last_peer_out_op_desc); | ||||
output_list = last_peer_out_op_desc->GetOutputOffset(); | output_list = last_peer_out_op_desc->GetOutputOffset(); | ||||
if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) { | |||||
auto input_index = anchor->GetIdx(); | |||||
auto out_index = static_cast<unsigned long>(peer_out_anchor->GetIdx()); | |||||
if (output_list.size() > static_cast<size_t>(out_index)) { | |||||
int64_t input_offset = output_list.at(out_index); | |||||
if (has_mem_type_attr) { | if (has_mem_type_attr) { | ||||
auto input_size = tmp_op_desc->GetInputsSize(); | auto input_size = tmp_op_desc->GetInputsSize(); | ||||
auto ori_input_offset_list_size = origin_input_list.size(); | auto ori_input_offset_list_size = origin_input_list.size(); | ||||
@@ -1308,26 +1387,21 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< | |||||
} | } | ||||
// not hbm keep orignal inputoffest | // not hbm keep orignal inputoffest | ||||
// hbm inputoffset = original inputoffset + outputoffset | // hbm inputoffset = original inputoffset + outputoffset | ||||
input_list.emplace_back(memory_type[input_index] == RT_MEMORY_L1 | |||||
? origin_input_list[input_index] | |||||
: origin_input_list[input_index] + output_list.at(peer_out_anchor->GetIdx())); | |||||
GELOGI("fuison: node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]", | |||||
tmp_op_desc->GetName().c_str(), input_index, | |||||
peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(), | |||||
input_list.back()); | |||||
} else { | |||||
int64_t output_offset = output_list.at(peer_out_anchor->GetIdx()); | |||||
const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode()); | |||||
if (in_node->GetType() == CONSTANT) { | |||||
GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(input_index); | |||||
GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, output_offset)); | |||||
} | |||||
GELOGI("node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]", tmp_op_desc->GetName().c_str(), | |||||
input_index, peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(), | |||||
output_offset); | |||||
input_list.emplace_back(output_offset); | |||||
input_offset = (memory_type[valid_input_index] == RT_MEMORY_L1 | |||||
? origin_input_list[valid_input_index] | |||||
: origin_input_list[valid_input_index] + output_list.at(out_index)); | |||||
} | } | ||||
const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode()); | |||||
if (in_node->GetType() == CONSTANT) { | |||||
GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(static_cast<uint32_t>(anchor->GetIdx())); | |||||
GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset)); | |||||
} | |||||
GELOGI("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]", | |||||
has_mem_type_attr == true ? "Fusion" : "", tmp_op_desc->GetName().c_str(), valid_input_index, | |||||
peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), out_index, input_offset); | |||||
input_list.emplace_back(input_offset); | |||||
valid_input_index++; | |||||
} | } | ||||
} | } | ||||
return ge::SUCCESS; | return ge::SUCCESS; | ||||
@@ -1422,83 +1496,49 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start) { | |||||
// set the address attr of atomic clean operator for loop graph | |||||
int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start; | |||||
GELOGI("SetLoopGraphAtomicAttr beign, atomic_addr_clean start size is %ld, mem_size is %ld, mem_offset is %zu.", | |||||
atomic_mem_start, atomic_mem_size, memory_offset_[0].mem_offset_); | |||||
const auto &in_control_anchor = node->GetInControlAnchor(); | |||||
if (atomic_mem_size != 0 && in_control_anchor != nullptr) { | |||||
for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { | |||||
if (peer_out_control_anchor == nullptr) { | |||||
continue; | |||||
} | |||||
auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); | |||||
auto peer_out_node_desc = peer_out_node->GetOpDesc(); | |||||
if (peer_out_node_desc == nullptr) { | |||||
continue; | |||||
} | |||||
GELOGD("SetLoopGraphAtomicAttr, node is %s, op type is %s.", peer_out_node_desc->GetName().c_str(), | |||||
peer_out_node_desc->GetType().c_str()); | |||||
if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { | |||||
GE_CHK_STATUS_EXEC(SetAtomicCleanAttr(peer_out_node, {atomic_mem_start}, {atomic_mem_size}), | |||||
GELOGE(FAILED, "SetAtomicCleanAttr failed."); | |||||
return FAILED); | |||||
} | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, const vector<int64_t> &atomic_mem_start, | |||||
ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector<int64_t> &atomic_mem_start, | |||||
const vector<int64_t> &atomic_mem_size) { | const vector<int64_t> &atomic_mem_size) { | ||||
for (ge::NodePtr &node : compute_graph_->GetAllNodes()) { | |||||
auto node_op_desc = node->GetOpDesc(); | |||||
GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | |||||
if (((n != nullptr) && (node->GetName() == n->GetName())) || | |||||
((n == nullptr) && (node_op_desc->GetType() == ATOMICADDRCLEAN))) { | |||||
vector<int64_t> workspace_vector = node_op_desc->GetWorkspace(); | |||||
vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes(); | |||||
workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | |||||
workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | |||||
node_op_desc->SetWorkspace(workspace_vector); | |||||
node_op_desc->SetWorkspaceBytes(workspace_byte_vector); | |||||
std::vector<int64_t> mem_start_vector; | |||||
// If GetListInt fail, mem_start_vector is empty. | |||||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); | |||||
mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | |||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), | |||||
GELOGE(FAILED, "SetListInt failed."); | |||||
return FAILED); | |||||
std::vector<int64_t> mem_size_vector; | |||||
// If GetListInt fail, mem_size_vector is empty. | |||||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); | |||||
mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | |||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), | |||||
GELOGE(FAILED, "SetListInt failed."); | |||||
return FAILED); | |||||
std::stringstream ss; | |||||
for (auto iter : atomic_mem_start) { | |||||
ss << iter << " "; | |||||
} | |||||
string atomic_mem_start_str = ss.str(); | |||||
ss.clear(); | |||||
ss.str(""); | |||||
for (auto iter : atomic_mem_size) { | |||||
ss << iter << " "; | |||||
} | |||||
string atomic_mem_size_str = ss.str(); | |||||
GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]", | |||||
node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), | |||||
atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId()); | |||||
auto node_op_desc = node->GetOpDesc(); | |||||
if (node_op_desc != nullptr) { | |||||
GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str()); | |||||
vector<int64_t> workspace_vector = node_op_desc->GetWorkspace(); | |||||
vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes(); | |||||
workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | |||||
workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | |||||
node_op_desc->SetWorkspace(workspace_vector); | |||||
node_op_desc->SetWorkspaceBytes(workspace_byte_vector); | |||||
std::vector<int64_t> mem_start_vector; | |||||
// If GetListInt fail, mem_start_vector is empty. | |||||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); | |||||
mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | |||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), | |||||
GELOGE(FAILED, "SetListInt failed."); | |||||
return FAILED); | |||||
std::vector<int64_t> mem_size_vector; | |||||
// If GetListInt fail, mem_size_vector is empty. | |||||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); | |||||
mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | |||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), | |||||
GELOGE(FAILED, "SetListInt failed."); | |||||
return FAILED); | |||||
std::stringstream ss; | |||||
for (auto iter : atomic_mem_start) { | |||||
ss << iter << " "; | |||||
} | } | ||||
string atomic_mem_start_str = ss.str(); | |||||
ss.clear(); | |||||
ss.str(""); | |||||
for (auto iter : atomic_mem_size) { | |||||
ss << iter << " "; | |||||
} | |||||
string atomic_mem_size_str = ss.str(); | |||||
GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]", | |||||
node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), | |||||
atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId()); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -135,6 +135,9 @@ class GraphMemoryAssigner { | |||||
ge::Status ReAssignAtomicMemory(bool is_loop_graph); | ge::Status ReAssignAtomicMemory(bool is_loop_graph); | ||||
ge::Status FilterAtomicNodesForMemoryAssign(std::map<NodePtr, vector<NodePtr>> &normal_atomic_nodes_map, | |||||
std::vector<NodePtr> &connecting_output_atomic_nodes); | |||||
ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | ||||
int64_t &continuous_mem_size); | int64_t &continuous_mem_size); | ||||
@@ -165,16 +168,12 @@ class GraphMemoryAssigner { | |||||
ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, | ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, | ||||
const std::vector<int64_t> &mem_offset_end); | const std::vector<int64_t> &mem_offset_end); | ||||
/// | |||||
/// @brief set loop graph atomic attr | |||||
/// @param node, atomic memory assignment start offset | |||||
/// @param atomic_mem_start: atomic op memory start address | |||||
/// | |||||
ge::Status SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start); | |||||
ge::Status SetAtomicCleanAttr(const ge::NodePtr &n, const std::vector<int64_t> &atomic_mem_start, | |||||
ge::Status SetAtomicCleanAttr(const ge::NodePtr &node, const std::vector<int64_t> &atomic_mem_start, | |||||
const std::vector<int64_t> &atomic_mem_size); | const std::vector<int64_t> &atomic_mem_size); | ||||
ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node); | |||||
void AlignMemOffset(const int64_t &mem_align_size); | void AlignMemOffset(const int64_t &mem_align_size); | ||||
ge::Status UpdateOpInputOffset(const NodePtr &node, vector<int64_t> &input_list) const; | ge::Status UpdateOpInputOffset(const NodePtr &node, vector<int64_t> &input_list) const; | ||||
@@ -266,6 +266,14 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
if (is_unknown_shape) { | if (is_unknown_shape) { | ||||
GE_CHK_STATUS_RET(SetUnknownShapeStream(run_context, stream), "Set unknown shape stream failed."); | GE_CHK_STATUS_RET(SetUnknownShapeStream(run_context, stream), "Set unknown shape stream failed."); | ||||
} | } | ||||
std::function<void()> callback = [&]() { | |||||
if (is_unknown_shape) { | |||||
if (DestroyUnknownShapeStream(run_context, stream) != SUCCESS) { | |||||
GELOGE(FAILED, "Destory unknown shape stream failed."); | |||||
} | |||||
} | |||||
}; | |||||
GE_MAKE_GUARD(release, callback); | |||||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | ||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
@@ -352,9 +360,6 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id, | op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id, | ||||
task_list_size_after - task_list_size_before); | task_list_size_after - task_list_size_before); | ||||
} | } | ||||
if (is_unknown_shape) { | |||||
GE_CHK_STATUS_RET(DestroyUnknownShapeStream(run_context, stream), "Destory unknown shape stream failed."); | |||||
} | |||||
GE_TIMESTAMP_CALLNUM_EVENT_END(GenerateTask, "GraphBuild::GenerateTask"); | GE_TIMESTAMP_CALLNUM_EVENT_END(GenerateTask, "GraphBuild::GenerateTask"); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -532,6 +537,9 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) { | |||||
(void)ge_lib->DNNEngineManagerObj().GetDNNEngineName(node); | (void)ge_lib->DNNEngineManagerObj().GetDNNEngineName(node); | ||||
} | } | ||||
(void)op_desc->DelAttr(kIsFirstNode); | |||||
(void)op_desc->DelAttr(kIsLastNode); | |||||
all_stream_ops[op_desc->GetStreamId()].emplace_back(op_desc); | all_stream_ops[op_desc->GetStreamId()].emplace_back(op_desc); | ||||
} | } | ||||
@@ -645,8 +653,6 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
vector<uint32_t> &all_reduce_nodes) const { | vector<uint32_t> &all_reduce_nodes) const { | ||||
GELOGI("Start AutoFindBpOpIndex"); | GELOGI("Start AutoFindBpOpIndex"); | ||||
NodePtr bp_node = nullptr; | NodePtr bp_node = nullptr; | ||||
uint32_t last_bp = 0; | |||||
uint32_t iter_end = 0; | |||||
uint32_t current_idx = 0; | uint32_t current_idx = 0; | ||||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | ||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
@@ -662,20 +668,40 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
all_reduce_nodes.emplace_back(current_idx); | all_reduce_nodes.emplace_back(current_idx); | ||||
GELOGI("Allreduce name %s, idx %u", op_desc->GetName().c_str(), current_idx); | GELOGI("Allreduce name %s, idx %u", op_desc->GetName().c_str(), current_idx); | ||||
} | } | ||||
if (op_desc->GetType() == NETOUTPUT) { | |||||
if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { | |||||
if (bp_node == nullptr) { | if (bp_node == nullptr) { | ||||
bp_node = node; | bp_node = node; | ||||
} | } | ||||
iter_end = current_idx; | |||||
GELOGI("Iter end name %s, idx %u", op_desc->GetName().c_str(), iter_end); | |||||
} | |||||
if (graph->GetNeedIteration()) { | |||||
if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") { | |||||
profiling_point.end_index.insert(current_idx); | |||||
GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive", | |||||
op_desc->GetName().c_str(), current_idx); | |||||
} | |||||
if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGN) { | |||||
profiling_point.end_index.insert(current_idx); | |||||
GELOGI("Iter end name %s, idx %u, from FlowCtrl_LoopCond_ASSIGN", op_desc->GetName().c_str(), current_idx); | |||||
} | |||||
} else { | |||||
if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { | |||||
profiling_point.end_index.insert(current_idx); | |||||
GELOGI("Iter end name %s, idx %u, from NETOUTPUT", op_desc->GetName().c_str(), current_idx); | |||||
} | |||||
} | } | ||||
} | } | ||||
profiling_point.end_index = iter_end; | |||||
if (bp_node == nullptr) { | if (bp_node == nullptr) { | ||||
GELOGW("not find bp_node."); | GELOGW("not find bp_node."); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
profiling_point.bp_index = FindLastBpFromBpNode(graph, bp_node); | |||||
return SUCCESS; | |||||
} | |||||
uint32_t TaskGenerator::FindLastBpFromBpNode(const ComputeGraphPtr &graph, const NodePtr &bp_node) const { | |||||
uint32_t last_bp = 0; | |||||
OpDescPtr bp_op_desc = nullptr; | OpDescPtr bp_op_desc = nullptr; | ||||
for (auto &in_anchor : bp_node->GetAllInDataAnchors()) { | for (auto &in_anchor : bp_node->GetAllInDataAnchors()) { | ||||
auto out_anchor = in_anchor->GetPeerOutAnchor(); | auto out_anchor = in_anchor->GetPeerOutAnchor(); | ||||
@@ -691,7 +717,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
} | } | ||||
GE_CHECK_NOTNULL(bp_op_desc); | GE_CHECK_NOTNULL(bp_op_desc); | ||||
current_idx = 0; | |||||
uint32_t current_idx = 0; | |||||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | ||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
@@ -702,8 +728,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
break; | break; | ||||
} | } | ||||
} | } | ||||
profiling_point.bp_index = last_bp; | |||||
return SUCCESS; | |||||
return last_bp; | |||||
} | } | ||||
Status TaskGenerator::FindFpOfEnv(const ComputeGraphPtr &graph, const std::string &fp_point_str, | Status TaskGenerator::FindFpOfEnv(const ComputeGraphPtr &graph, const std::string &fp_point_str, | ||||
@@ -734,7 +759,6 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin | |||||
ProfilingPoint &profiling_point, vector<uint32_t> &all_reduce_nodes) const { | ProfilingPoint &profiling_point, vector<uint32_t> &all_reduce_nodes) const { | ||||
GELOGI("Start FindBpOfEnv"); | GELOGI("Start FindBpOfEnv"); | ||||
uint32_t current_idx = 0; | uint32_t current_idx = 0; | ||||
uint32_t iter_end = 0; | |||||
uint32_t last_bp = 0; | uint32_t last_bp = 0; | ||||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | ||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
@@ -745,10 +769,23 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin | |||||
continue; | continue; | ||||
} | } | ||||
if (op_desc->GetType() == NETOUTPUT) { | |||||
iter_end = current_idx; | |||||
GELOGI("Iter end name %s, idx %u", op_desc->GetName().c_str(), iter_end); | |||||
if (graph->GetNeedIteration()) { | |||||
if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") { | |||||
profiling_point.end_index.insert(current_idx); | |||||
GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive", | |||||
op_desc->GetName().c_str(), current_idx); | |||||
} | |||||
if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGN) { | |||||
profiling_point.end_index.insert(current_idx); | |||||
GELOGI("Iter end name %s, idx %u, from FlowCtrl_LoopCond_ASSIGN", op_desc->GetName().c_str(), current_idx); | |||||
} | |||||
} else { | |||||
if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { | |||||
profiling_point.end_index.insert(current_idx); | |||||
GELOGI("Iter end name %s, idx %u, from NETOUTPUT", op_desc->GetName().c_str(), current_idx); | |||||
} | |||||
} | } | ||||
if (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE) { | if (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE) { | ||||
all_reduce_nodes.emplace_back(current_idx); | all_reduce_nodes.emplace_back(current_idx); | ||||
GELOGI("Allreduce name %s, idx %u", op_desc->GetName().c_str(), current_idx); | GELOGI("Allreduce name %s, idx %u", op_desc->GetName().c_str(), current_idx); | ||||
@@ -760,7 +797,6 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin | |||||
} | } | ||||
profiling_point.bp_index = last_bp; | profiling_point.bp_index = last_bp; | ||||
profiling_point.end_index = iter_end; | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -857,7 +893,7 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const | |||||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | ||||
ProfilingManager::Instance().ProfilingTrainingTraceOn(); | ProfilingManager::Instance().ProfilingTrainingTraceOn(); | ||||
if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || | if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || | ||||
(profiling_point.end_index == 0)) { | |||||
(profiling_point.end_index.empty())) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
if (profiling_point.fp_index == node_index) { | if (profiling_point.fp_index == node_index) { | ||||
@@ -914,7 +950,7 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P | |||||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | ||||
ProfilingManager::Instance().ProfilingTrainingTraceOn(); | ProfilingManager::Instance().ProfilingTrainingTraceOn(); | ||||
if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || | if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || | ||||
(profiling_point.end_index == 0)) { | |||||
(profiling_point.end_index.empty())) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
if (profiling_point.bp_index == node_index) { | if (profiling_point.bp_index == node_index) { | ||||
@@ -928,7 +964,7 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P | |||||
bp_log_def->set_notify(false); | bp_log_def->set_notify(false); | ||||
task_def_list.emplace_back(bp_task_def); | task_def_list.emplace_back(bp_task_def); | ||||
} | } | ||||
if (profiling_point.end_index == node_index) { | |||||
if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end()) { | |||||
GELOGI("The iteration end operator is %s, idx %u", op_desc->GetName().c_str(), node_index); | GELOGI("The iteration end operator is %s, idx %u", op_desc->GetName().c_str(), node_index); | ||||
TaskDef end_task_def; | TaskDef end_task_def; | ||||
end_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | end_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | ||||
@@ -36,7 +36,7 @@ class OpsKernelManager; | |||||
struct ProfilingPoint { | struct ProfilingPoint { | ||||
uint32_t fp_index = 0; | uint32_t fp_index = 0; | ||||
uint32_t bp_index = 0; | uint32_t bp_index = 0; | ||||
uint32_t end_index = 0; | |||||
std::set<uint32_t> end_index; | |||||
}; | }; | ||||
// Describes infos needed by generate task for fusion node | // Describes infos needed by generate task for fusion node | ||||
struct FusionTaskInfo { | struct FusionTaskInfo { | ||||
@@ -112,6 +112,7 @@ class TaskGenerator { | |||||
Status AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point) const; | Status AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point) const; | ||||
Status AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, | Status AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, | ||||
vector<uint32_t> &all_reduce_nodes) const; | vector<uint32_t> &all_reduce_nodes) const; | ||||
uint32_t FindLastBpFromBpNode(const ComputeGraphPtr &graph, const NodePtr &bp_node) const; | |||||
Status FindFpOfEnv(const ComputeGraphPtr &graph, const std::string &fp_point_str, | Status FindFpOfEnv(const ComputeGraphPtr &graph, const std::string &fp_point_str, | ||||
ProfilingPoint &profiling_point) const; | ProfilingPoint &profiling_point) const; | ||||
@@ -592,7 +592,17 @@ Status GraphExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigI | |||||
GELOGW("GetAIPPInfo is not success."); | GELOGW("GetAIPPInfo is not success."); | ||||
return ret; | return ret; | ||||
} | } | ||||
return SUCCESS; | |||||
} | |||||
Status GraphExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { | |||||
auto model_manager = ge::ModelManager::GetInstance(); | |||||
GE_CHECK_NOTNULL(model_manager); | |||||
Status ret = model_manager->GetAippType(model_id, index, type, aipp_index); | |||||
if (ret != SUCCESS) { | |||||
GELOGW("Get aipp type is not success."); | |||||
return ret; | |||||
} | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -75,6 +75,8 @@ class GraphExecutor { | |||||
static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | ||||
static Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief Get dynamic batch_info | /// @brief Get dynamic batch_info | ||||
@@ -695,11 +695,7 @@ Status DataDumper::LoadDumpInfo() { | |||||
} | } | ||||
if (dump_properties_.GetDumpMode() == kDumpInput) { | if (dump_properties_.GetDumpMode() == kDumpInput) { | ||||
if (op_iter.is_task) { | if (op_iter.is_task) { | ||||
Status ret = DumpInput(op_iter, task); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Dump input failed"); | |||||
return ret; | |||||
} | |||||
GE_CHK_STATUS_RET(DumpInput(op_iter, task), "Dump input failed"); | |||||
} | } | ||||
op_mapping_info.mutable_task()->Add(std::move(task)); | op_mapping_info.mutable_task()->Add(std::move(task)); | ||||
continue; | continue; | ||||
@@ -726,7 +722,7 @@ Status DataDumper::LoadDumpInfo() { | |||||
SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info); | SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info); | ||||
if (!op_list_.empty() || is_op_debug_) { | |||||
if (!op_list_.empty() || is_op_debug_ || is_end_graph_) { | |||||
auto ret = ExecuteLoadDumpInfo(op_mapping_info); | auto ret = ExecuteLoadDumpInfo(op_mapping_info); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Execute load dump info failed"); | GELOGE(ret, "Execute load dump info failed"); | ||||
@@ -740,7 +736,6 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, | |||||
aicpu::dump::OpMappingInfo &op_mapping_info) { | aicpu::dump::OpMappingInfo &op_mapping_info) { | ||||
if (dump_properties_.GetDumpMode() == kDumpOutput || dump_properties_.GetDumpMode() == kDumpInput || | if (dump_properties_.GetDumpMode() == kDumpOutput || dump_properties_.GetDumpMode() == kDumpInput || | ||||
dump_properties_.GetDumpMode() == kDumpAll) { | dump_properties_.GetDumpMode() == kDumpAll) { | ||||
GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); | |||||
aicpu::dump::Task task; | aicpu::dump::Task task; | ||||
task.set_end_graph(true); | task.set_end_graph(true); | ||||
task.set_task_id(end_graph_task_id_); | task.set_task_id(end_graph_task_id_); | ||||
@@ -748,6 +743,14 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, | |||||
task.mutable_op()->set_op_name(NODE_NAME_END_GRAPH); | task.mutable_op()->set_op_name(NODE_NAME_END_GRAPH); | ||||
task.mutable_op()->set_op_type(ENDGRAPH); | task.mutable_op()->set_op_type(ENDGRAPH); | ||||
op_mapping_info.mutable_task()->Add(std::move(task)); | op_mapping_info.mutable_task()->Add(std::move(task)); | ||||
is_end_graph_ = true; | |||||
if (op_mapping_info.model_name_param_case() == aicpu::dump::OpMappingInfo::kModelName) { | |||||
GELOGI("Add end_graph_info to aicpu, model_name is %s, task_id is %u, stream_id is %u", | |||||
op_mapping_info.model_name().c_str(), end_graph_task_id_, end_graph_stream_id_); | |||||
return; | |||||
} | |||||
GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); | |||||
} | } | ||||
} | } | ||||
@@ -116,6 +116,7 @@ class DataDumper { | |||||
std::vector<InnerDumpInfo> op_list_; | std::vector<InnerDumpInfo> op_list_; | ||||
uint32_t end_graph_task_id_ = 0; | uint32_t end_graph_task_id_ = 0; | ||||
uint32_t end_graph_stream_id_ = 0; | uint32_t end_graph_stream_id_ = 0; | ||||
bool is_end_graph_ = false; | |||||
std::multimap<std::string, InnerInputMapping> input_map_; | std::multimap<std::string, InnerInputMapping> input_map_; | ||||
bool load_flag_; | bool load_flag_; | ||||
uint32_t device_id_; | uint32_t device_id_; | ||||
@@ -125,6 +125,7 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener | |||||
rt_model_stream_(nullptr), | rt_model_stream_(nullptr), | ||||
is_inner_model_stream_(false), | is_inner_model_stream_(false), | ||||
is_async_mode_(false), | is_async_mode_(false), | ||||
last_execute_mode_(INITIALIZATION), | |||||
session_id_(0), | session_id_(0), | ||||
device_id_(0), | device_id_(0), | ||||
maxDumpOpNum_(0), | maxDumpOpNum_(0), | ||||
@@ -1572,6 +1573,48 @@ Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status DavinciModel::GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index) { | |||||
GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); | |||||
// Set default value | |||||
type = DATA_WITHOUT_AIPP; | |||||
aipp_index = 0xFFFFFFFF; // default invalid value | |||||
OpDescPtr data_op = data_op_list_[index]; | |||||
GE_CHECK_NOTNULL(data_op); | |||||
if (!data_op->HasAttr(ATTR_DATA_RELATED_AIPP_MODE)) { | |||||
GELOGW("There is no aipp releated info with index %u.", index); | |||||
return SUCCESS; | |||||
} | |||||
std::string data_mode; | |||||
(void)AttrUtils::GetStr(data_op, ATTR_DATA_RELATED_AIPP_MODE, data_mode); | |||||
if (data_mode == "static_aipp") { | |||||
type = DATA_WITH_STATIC_AIPP; | |||||
} else if (data_mode == "dynamic_aipp") { | |||||
type = DATA_WITH_DYNAMIC_AIPP; | |||||
} else if (data_mode == "dynamic_aipp_conf") { | |||||
type = DYNAMIC_AIPP_NODE; | |||||
} else { | |||||
GELOGE(INTERNAL_ERROR, "The info of aipp releated info %s is invalid with index %u.", data_mode.c_str(), index); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
if (type == DATA_WITH_DYNAMIC_AIPP) { | |||||
string releated_name; | |||||
(void)AttrUtils::GetStr(data_op, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name); | |||||
for (size_t i = 0; i < data_op_list_.size(); ++i) { | |||||
GE_CHECK_NOTNULL(data_op_list_[i]); | |||||
if (data_op_list_[i]->GetName() == releated_name) { | |||||
GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), i, index); | |||||
aipp_index = i; | |||||
} | |||||
} | |||||
if (aipp_index == 0xFFFFFFFF) { | |||||
GELOGE(INTERNAL_ERROR, "Can not find aipp data node from index %u", index); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
void DavinciModel::SetDynamicSize(const std::vector<uint64_t> &batch_num, int32_t dynamic_type) { | void DavinciModel::SetDynamicSize(const std::vector<uint64_t> &batch_num, int32_t dynamic_type) { | ||||
batch_size_.clear(); | batch_size_.clear(); | ||||
if (batch_num.empty()) { | if (batch_num.empty()) { | ||||
@@ -1665,9 +1708,9 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, | |||||
return; | return; | ||||
} | } | ||||
// judge if this data is linked dynamic aipp first, multiply batch has been considered | // judge if this data is linked dynamic aipp first, multiply batch has been considered | ||||
if (op_desc->HasAttr("_dynamic_aipp_input_dims")) { | |||||
if (op_desc->HasAttr(ATTR_DYNAMIC_AIPP_INPUT_DIMS)) { | |||||
vector<int64_t> dynamic_aipp_input_dims; | vector<int64_t> dynamic_aipp_input_dims; | ||||
(void)AttrUtils::GetListInt(op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_input_dims); | |||||
(void)AttrUtils::GetListInt(op_desc, ATTR_DYNAMIC_AIPP_INPUT_DIMS, dynamic_aipp_input_dims); | |||||
SetInputDimsInfo(dynamic_aipp_input_dims, format, input); | SetInputDimsInfo(dynamic_aipp_input_dims, format, input); | ||||
return; | return; | ||||
} else { | } else { | ||||
@@ -1885,13 +1928,7 @@ Status DavinciModel::SinkModelProfile() { | |||||
name = name_; | name = name_; | ||||
} | } | ||||
size_t name_len = name.size(); | size_t name_len = name.size(); | ||||
// phy device id | |||||
uint32_t phy_device_id = 0; | |||||
rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id); | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||||
GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); | |||||
return FAILED); | |||||
reporter_data.deviceId = phy_device_id; | |||||
reporter_data.deviceId = device_id_; | |||||
reporter_data.data = (unsigned char *)&name_len; | reporter_data.data = (unsigned char *)&name_len; | ||||
reporter_data.dataLen = sizeof(int32_t); | reporter_data.dataLen = sizeof(int32_t); | ||||
GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", | GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", | ||||
@@ -2060,12 +2097,7 @@ Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { | |||||
GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | ||||
return FAILED, "Sink model tag memcpy error."); | return FAILED, "Sink model tag memcpy error."); | ||||
// device id | // device id | ||||
uint32_t phy_device_id = 0; | |||||
rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id); | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||||
GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); | |||||
return FAILED); | |||||
reporter_data.deviceId = phy_device_id; | |||||
reporter_data.deviceId = device_id_; | |||||
// Model Header | // Model Header | ||||
string name; | string name; | ||||
@@ -2879,6 +2911,12 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<v | |||||
} | } | ||||
} | } | ||||
} | } | ||||
auto it = zero_copy_op_id_batch_label_.find(op_desc->GetId()); | |||||
if (it == zero_copy_op_id_batch_label_.end()) { | |||||
zero_copy_task.SetBatchLabel(kDefaultBatchLable); | |||||
} else { | |||||
zero_copy_task.SetBatchLabel(it->second); | |||||
} | |||||
std::lock_guard<std::mutex> lock(outside_addrs_mutex_); | std::lock_guard<std::mutex> lock(outside_addrs_mutex_); | ||||
if (zero_copy_task.IsTaskArgsSet()) { | if (zero_copy_task.IsTaskArgsSet()) { | ||||
@@ -3045,6 +3083,9 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> & | |||||
data.first, addr, size, buffer_addr); | data.first, addr, size, buffer_addr); | ||||
// For input data, just copy for rts task. | // For input data, just copy for rts task. | ||||
for (ZeroCopyTask &task : zero_copy_tasks_) { | for (ZeroCopyTask &task : zero_copy_tasks_) { | ||||
if (task.GetBatchLabel() != kDefaultBatchLable && task.GetBatchLabel() != batch_label) { | |||||
continue; | |||||
} | |||||
uintptr_t addr_val = reinterpret_cast<uintptr_t>(addr); | uintptr_t addr_val = reinterpret_cast<uintptr_t>(addr); | ||||
if (task.UpdateTaskParam(addr_val, buffer_addr, zero_copy_batch_label_addrs_, batch_label) != SUCCESS) { | if (task.UpdateTaskParam(addr_val, buffer_addr, zero_copy_batch_label_addrs_, batch_label) != SUCCESS) { | ||||
return FAILED; | return FAILED; | ||||
@@ -3361,6 +3402,11 @@ bool DavinciModel::IsBroadCastOpData(const ge::NodePtr &var_node) { | |||||
/// @return Status | /// @return Status | ||||
/// | /// | ||||
Status DavinciModel::InitModelStream(rtStream_t stream) { | Status DavinciModel::InitModelStream(rtStream_t stream) { | ||||
ExecuteMode curr_mode = is_async_mode_ ? ASYNCHRONIZATION : SYNCHRONIZATION; | |||||
GE_CHK_BOOL_RET_STATUS((curr_mode == last_execute_mode_) || (last_execute_mode_ == INITIALIZATION), INTERNAL_ERROR, | |||||
"NnExecute not support mix execute."); | |||||
last_execute_mode_ = curr_mode; | |||||
// asynchronize mode, use user input stream. | // asynchronize mode, use user input stream. | ||||
if (is_async_mode_) { | if (is_async_mode_) { | ||||
rt_model_stream_ = stream; | rt_model_stream_ = stream; | ||||
@@ -3516,7 +3562,7 @@ uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) { | |||||
} | } | ||||
void DavinciModel::FreeFeatureMapMem() { | void DavinciModel::FreeFeatureMapMem() { | ||||
if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { | |||||
if (std::getenv(kEnvGeuseStaticMemory) != nullptr && is_inner_mem_base_) { | |||||
string weight_memory_key = std::to_string(0) + "_f"; | string weight_memory_key = std::to_string(0) + "_f"; | ||||
if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(weight_memory_key) != nullptr) { | if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(weight_memory_key) != nullptr) { | ||||
GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weight_memory_key, GetDeviceId()), | GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weight_memory_key, GetDeviceId()), | ||||
@@ -75,6 +75,12 @@ struct timeInfo { | |||||
int64_t dumpEndTime; | int64_t dumpEndTime; | ||||
}; | }; | ||||
enum ExecuteMode { | |||||
INITIALIZATION, | |||||
SYNCHRONIZATION, | |||||
ASYNCHRONIZATION, | |||||
}; | |||||
// comments | // comments | ||||
class DavinciModel { | class DavinciModel { | ||||
public: | public: | ||||
@@ -314,6 +320,8 @@ class DavinciModel { | |||||
/// | /// | ||||
Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info); | Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info); | ||||
Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief Get model_id. | /// @brief Get model_id. | ||||
@@ -884,6 +892,7 @@ class DavinciModel { | |||||
bool is_inner_model_stream_; | bool is_inner_model_stream_; | ||||
bool is_async_mode_; // For NN execute, Async mode use rtMemcpyAsync on rt_model_stream_. | bool is_async_mode_; // For NN execute, Async mode use rtMemcpyAsync on rt_model_stream_. | ||||
ExecuteMode last_execute_mode_; | |||||
bool is_stream_list_bind_{false}; | bool is_stream_list_bind_{false}; | ||||
bool is_pure_head_stream_{false}; | bool is_pure_head_stream_{false}; | ||||
@@ -43,6 +43,13 @@ const std::string kCmdTypeProfInit = "prof_init"; | |||||
const std::string kCmdTypeProfFinalize = "prof_finalize"; | const std::string kCmdTypeProfFinalize = "prof_finalize"; | ||||
const std::string kCmdTypeProfStart = "prof_start"; | const std::string kCmdTypeProfStart = "prof_start"; | ||||
const std::string kCmdTypeProfStop = "prof_stop"; | const std::string kCmdTypeProfStop = "prof_stop"; | ||||
const char *const kLoadOpFromBuf = "loadOpFromBuf"; | |||||
struct CustAicpuSoBuf { | |||||
uint64_t kernelSoBuf; | |||||
uint32_t kernelSoBufLen; | |||||
uint64_t kernelSoName; | |||||
uint32_t kernelSoNameLen; | |||||
} __attribute__((packed)); | |||||
} // namespace | } // namespace | ||||
DumpProperties ModelManager::dump_properties_; | DumpProperties ModelManager::dump_properties_; | ||||
@@ -163,7 +170,13 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) { | |||||
GELOGI("The session: %lu not created.", session_id); | GELOGI("The session: %lu not created.", session_id); | ||||
return; | return; | ||||
} else { | } else { | ||||
GE_CHK_RT(rtSetDevice(static_cast<int32_t>(GetContext().DeviceId()))); | |||||
rtContext_t ctx = nullptr; | |||||
bool has_ctx = (rtCtxGetCurrent(&ctx) == RT_ERROR_NONE); | |||||
if (!has_ctx) { | |||||
GELOGI("Set device %u.", GetContext().DeviceId()); | |||||
GE_CHK_RT(rtSetDevice(static_cast<int32_t>(GetContext().DeviceId()))); | |||||
} | |||||
Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_DESTROY, session_id, 0); | Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_DESTROY, session_id, 0); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGW("The session: %lu destroy failed.", session_id); | GELOGW("The session: %lu destroy failed.", session_id); | ||||
@@ -171,7 +184,11 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) { | |||||
(void)sess_ids_.erase(session_id); | (void)sess_ids_.erase(session_id); | ||||
GELOGI("The session: %lu destroyed.", session_id); | GELOGI("The session: %lu destroyed.", session_id); | ||||
} | } | ||||
GE_CHK_RT(rtDeviceReset(static_cast<int32_t>(GetContext().DeviceId()))); | |||||
if (!has_ctx) { | |||||
GELOGI("Reset device %u.", GetContext().DeviceId()); | |||||
GE_CHK_RT(rtDeviceReset(static_cast<int32_t>(GetContext().DeviceId()))); | |||||
} | |||||
} | } | ||||
} | } | ||||
@@ -382,6 +399,7 @@ Status ModelManager::Unload(uint32_t model_id) { | |||||
} | } | ||||
std::lock_guard<std::mutex> lock(exeception_infos_mutex_); | std::lock_guard<std::mutex> lock(exeception_infos_mutex_); | ||||
exception_infos_.clear(); | exception_infos_.clear(); | ||||
cust_aicpu_so_.clear(); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -858,6 +876,14 @@ Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippCo | |||||
return davinci_model->GetAIPPInfo(index, aipp_info); | return davinci_model->GetAIPPInfo(index, aipp_info); | ||||
} | } | ||||
Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { | |||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetAIPPInfo failed, invalid model_id is %u.", | |||||
model_id); | |||||
return davinci_model->GetAippType(index, type, aipp_index); | |||||
} | |||||
Status ModelManager::GenSessionId(uint64_t &session_id) { | Status ModelManager::GenSessionId(uint64_t &session_id) { | ||||
std::lock_guard<std::mutex> lock(session_id_create_mutex_); | std::lock_guard<std::mutex> lock(session_id_create_mutex_); | ||||
@@ -919,7 +945,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||||
} | } | ||||
davinci_model->SetDeviceId(device_id); | davinci_model->SetDeviceId(device_id); | ||||
davinci_model->SetOmName(model.om_name); | davinci_model->SetOmName(model.om_name); | ||||
if (DumpManager::GetInstance().IsDumpOpen()) { | |||||
if (DumpManager::GetInstance().GetDumpProperties().IsDumpOpen()) { | |||||
davinci_model->SetDumpProperties(DumpManager::GetInstance().GetDumpProperties()); | davinci_model->SetDumpProperties(DumpManager::GetInstance().GetDumpProperties()); | ||||
} else { | } else { | ||||
davinci_model->SetDumpProperties(dump_properties_); | davinci_model->SetDumpProperties(dump_properties_); | ||||
@@ -1070,6 +1096,67 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status ModelManager::LoadCustAicpuSo(const OpDescPtr op_desc, string so_name) { | |||||
std::lock_guard<std::mutex> lock(cust_aicpu_mutex_); | |||||
auto it = cust_aicpu_so_.find(so_name); | |||||
if (it == cust_aicpu_so_.end()) { | |||||
GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s", | |||||
op_desc->GetName().c_str(), so_name.c_str()); | |||||
(void)cust_aicpu_so_.insert(so_name); | |||||
GELOGI("LaunchCustAicpuSo op name %s, so_name %s.", op_desc->GetName().c_str(), so_name.c_str()); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status ModelManager::LaunchCustAicpuSo(const OpDescPtr op_desc, string so_name) { | |||||
CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); | |||||
if (aicpu_kernel == nullptr) { | |||||
GELOGE(INTERNAL_ERROR, "cust aicpu op %s can't find kernel!", op_desc->GetName().c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
const void *aicpu_data = aicpu_kernel->GetBinData(); | |||||
uint32_t aicpu_data_length = aicpu_kernel->GetBinDataSize(); | |||||
void *d_aicpu_data = nullptr; | |||||
void *d_so_name = nullptr; | |||||
void *args = nullptr; | |||||
rtError_t status; | |||||
rtStream_t stream = nullptr; | |||||
GE_CHK_RT(rtMalloc(&d_aicpu_data, aicpu_data_length, RT_MEMORY_HBM)); | |||||
GE_CHK_RT(rtMemcpy(d_aicpu_data, aicpu_data_length, aicpu_data, aicpu_data_length, RT_MEMCPY_HOST_TO_DEVICE)); | |||||
GE_CHK_RT(rtMalloc(&d_so_name, so_name.size(), RT_MEMORY_HBM)); | |||||
GE_CHK_RT(rtMemcpy(d_so_name, so_name.size(), reinterpret_cast<const void *>(so_name.c_str()), so_name.size(), | |||||
RT_MEMCPY_HOST_TO_DEVICE)); | |||||
CustAicpuSoBuf cust_aicpu_so_buf; | |||||
cust_aicpu_so_buf.kernelSoBuf = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_aicpu_data)); | |||||
cust_aicpu_so_buf.kernelSoBufLen = aicpu_data_length; | |||||
cust_aicpu_so_buf.kernelSoName = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_so_name)); | |||||
cust_aicpu_so_buf.kernelSoNameLen = so_name.size(); | |||||
uint32_t args_size = sizeof(CustAicpuSoBuf); | |||||
GE_CHK_RT(rtMalloc(&args, args_size, RT_MEMORY_HBM)); | |||||
GE_CHK_RT(rtMemcpy(args, args_size, static_cast<void *>(&cust_aicpu_so_buf), args_size, RT_MEMCPY_HOST_TO_DEVICE)); | |||||
GE_CHK_RT(rtStreamCreate(&stream, 0)); | |||||
GE_CHK_RT(rtCpuKernelLaunch(nullptr, kLoadOpFromBuf, 1, args, args_size, nullptr, stream)); | |||||
status = rtStreamSynchronize(stream); | |||||
if (status != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); | |||||
GE_CHK_RT(rtStreamDestroy(stream)); | |||||
GE_CHK_RT(rtFree(args)); | |||||
GE_CHK_RT(rtFree(d_aicpu_data)); | |||||
GE_CHK_RT(rtFree(d_so_name)); | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | |||||
GE_CHK_RT(rtStreamDestroy(stream)); | |||||
GE_CHK_RT(rtFree(args)); | |||||
GE_CHK_RT(rtFree(d_aicpu_data)); | |||||
GE_CHK_RT(rtFree(d_so_name)); | |||||
GELOGI("Cpu kernel launch loadOpFromBuf task success."); | |||||
return SUCCESS; | |||||
} | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief get model memory size and weight | /// @brief get model memory size and weight | ||||
@@ -224,6 +224,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
/// | /// | ||||
ge::Status GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | ge::Status GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | ||||
ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); | |||||
/// | /// | ||||
/// @ingroup domi_ome | /// @ingroup domi_ome | ||||
/// @brief set model input and output size zero copy | /// @brief set model input and output size zero copy | ||||
@@ -268,6 +270,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); | ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); | ||||
ge::Status LoadCustAicpuSo(const OpDescPtr op_desc, string so_name); | |||||
ge::Status LaunchCustAicpuSo(const OpDescPtr op_desc, string so_name); | |||||
ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); | ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); | ||||
ge::Status GenSessionId(uint64_t &session_id); | ge::Status GenSessionId(uint64_t &session_id); | ||||
@@ -333,6 +339,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
uint64_t session_id_bias_; | uint64_t session_id_bias_; | ||||
std::set<uint64_t> sess_ids_; | std::set<uint64_t> sess_ids_; | ||||
std::vector<rtExceptionInfo> exception_infos_; | std::vector<rtExceptionInfo> exception_infos_; | ||||
std::mutex cust_aicpu_mutex_; | |||||
std::set<std::string> cust_aicpu_so_; | |||||
static DumpProperties dump_properties_; | static DumpProperties dump_properties_; | ||||
}; | }; | ||||
@@ -29,6 +29,14 @@ | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
#define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ | |||||
do { \ | |||||
if (SIZE <= static_cast<uint64_t>(OFFSET)) { \ | |||||
GELOGE(OUT_OF_MEMORY, "Node: %s, memory out of range[%lu: %ld]", OP->GetName().c_str(), SIZE, OFFSET); \ | |||||
return {}; \ | |||||
} \ | |||||
} while (0) | |||||
namespace ge { | namespace ge { | ||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
@@ -38,7 +46,7 @@ namespace ge { | |||||
vector<int64_t> ModelUtils::GetInputSize(ConstOpDescPtr op_desc) { | vector<int64_t> ModelUtils::GetInputSize(ConstOpDescPtr op_desc) { | ||||
vector<int64_t> v_input_size; | vector<int64_t> v_input_size; | ||||
GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_size); | GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_size); | ||||
const size_t inputs_size = op_desc->GetInputsSize(); | |||||
const size_t inputs_size = op_desc->GetAllInputsSize(); | |||||
const string op_type = op_desc->GetType(); | const string op_type = op_desc->GetType(); | ||||
const vector<bool> v_is_input_const = op_desc->GetIsInputConst(); | const vector<bool> v_is_input_const = op_desc->GetIsInputConst(); | ||||
@@ -151,7 +159,7 @@ vector<int64_t> ModelUtils::GetWeightSize(ConstOpDescPtr op_desc) { | |||||
} | } | ||||
// other ops get weight from connected constop | // other ops get weight from connected constop | ||||
const size_t inputs_size = op_desc->GetInputsSize(); | |||||
const size_t inputs_size = op_desc->GetAllInputsSize(); | |||||
const vector<bool> v_is_input_const = op_desc->GetIsInputConst(); | const vector<bool> v_is_input_const = op_desc->GetIsInputConst(); | ||||
for (size_t i = 0; i < inputs_size; ++i) { | for (size_t i = 0; i < inputs_size; ++i) { | ||||
if ((i < v_is_input_const.size()) && v_is_input_const[i]) { | if ((i < v_is_input_const.size()) && v_is_input_const[i]) { | ||||
@@ -191,7 +199,7 @@ vector<ConstGeTensorPtr> ModelUtils::GetWeights(ConstOpDescPtr op_desc) { | |||||
} | } | ||||
// other ops get weight from connected constop | // other ops get weight from connected constop | ||||
const size_t inputs_size = op_desc->GetInputsSize(); | |||||
const size_t inputs_size = op_desc->GetAllInputsSize(); | |||||
const vector<bool> v_is_input_const = op_desc->GetIsInputConst(); | const vector<bool> v_is_input_const = op_desc->GetIsInputConst(); | ||||
for (size_t i = 0; i < inputs_size; ++i) { | for (size_t i = 0; i < inputs_size; ++i) { | ||||
if ((i < v_is_input_const.size()) && v_is_input_const[i]) { | if ((i < v_is_input_const.size()) && v_is_input_const[i]) { | ||||
@@ -221,7 +229,7 @@ vector<::tagCcAICPUTensor> ModelUtils::GetInputDescs(ConstOpDescPtr op_desc) { | |||||
vector<::opTensor_t> v_input_descs; | vector<::opTensor_t> v_input_descs; | ||||
GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_descs); | GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_descs); | ||||
const size_t inputs_size = op_desc->GetInputsSize(); | |||||
const size_t inputs_size = op_desc->GetAllInputsSize(); | |||||
const vector<bool> v_is_input_const = op_desc->GetIsInputConst(); | const vector<bool> v_is_input_const = op_desc->GetIsInputConst(); | ||||
for (size_t i = 0; i < inputs_size; ++i) { | for (size_t i = 0; i < inputs_size; ++i) { | ||||
@@ -306,7 +314,7 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||||
GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_data_addr); | GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_data_addr); | ||||
uint64_t session_id = model_param.session_id; | uint64_t session_id = model_param.session_id; | ||||
const size_t inputs_size = op_desc->GetInputsSize(); | |||||
const size_t inputs_size = op_desc->GetAllInputsSize(); | |||||
const vector<int64_t> v_input_offset = op_desc->GetInputOffset(); | const vector<int64_t> v_input_offset = op_desc->GetInputOffset(); | ||||
const string op_type = op_desc->GetType(); | const string op_type = op_desc->GetType(); | ||||
@@ -320,20 +328,20 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||||
op_desc->GetName().c_str(), v_memory_type.size(), inputs_size); | op_desc->GetName().c_str(), v_memory_type.size(), inputs_size); | ||||
return v_input_data_addr; | return v_input_data_addr; | ||||
} | } | ||||
for (size_t i = 0; i < inputs_size; ++i) { | |||||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||||
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i)); | |||||
if (tensor_desc == nullptr) { | |||||
GELOGD("Op: %s, Index: %zu, has no input", op_desc->GetName().c_str(), i); | |||||
continue; | |||||
} | |||||
if ((i < v_is_input_const.size()) && v_is_input_const[i] && (op_type != NETOUTPUT)) { | if ((i < v_is_input_const.size()) && v_is_input_const[i] && (op_type != NETOUTPUT)) { | ||||
// TBE: add weights address to input | // TBE: add weights address to input | ||||
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i); | |||||
if (tensor_desc == nullptr) { | |||||
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); | |||||
continue; | |||||
} | |||||
int64_t tensor_size = 0; | int64_t tensor_size = 0; | ||||
GE_CHK_STATUS(TensorUtils::GetSize(*tensor_desc, tensor_size)); | GE_CHK_STATUS(TensorUtils::GetSize(*tensor_desc, tensor_size)); | ||||
if (tensor_size) { | if (tensor_size) { | ||||
int64_t data_offset = 0; | int64_t data_offset = 0; | ||||
GE_CHK_STATUS(TensorUtils::GetDataOffset(*tensor_desc, data_offset)); | GE_CHK_STATUS(TensorUtils::GetDataOffset(*tensor_desc, data_offset)); | ||||
VALIDATE_MEM_RANGE(op_desc, model_param.weight_size, data_offset); | |||||
uint8_t *weight_addr = model_param.weight_base + data_offset; | uint8_t *weight_addr = model_param.weight_base + data_offset; | ||||
v_input_data_addr.push_back(weight_addr); | v_input_data_addr.push_back(weight_addr); | ||||
GELOGI("[IMAS]GetInputDataAddrs graph_%u type[C] name[%s] input[%zu] memaddr[%p]", model_param.graph_id, | GELOGI("[IMAS]GetInputDataAddrs graph_%u type[C] name[%s] input[%zu] memaddr[%p]", model_param.graph_id, | ||||
@@ -345,11 +353,12 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||||
GE_IF_BOOL_EXEC(non_const_index >= v_input_offset.size(), | GE_IF_BOOL_EXEC(non_const_index >= v_input_offset.size(), | ||||
GELOGW("offsets=%zu, inputs=%zu, index=%zu.", v_input_offset.size(), inputs_size, non_const_index); | GELOGW("offsets=%zu, inputs=%zu, index=%zu.", v_input_offset.size(), inputs_size, non_const_index); | ||||
break;); | |||||
break); | |||||
int64_t input_offset = v_input_offset[non_const_index]; | int64_t input_offset = v_input_offset[non_const_index]; | ||||
non_const_index++; | non_const_index++; | ||||
GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset), | GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset), | ||||
VALIDATE_MEM_RANGE(op_desc, model_param.var_size, input_offset - model_param.logic_var_base); | |||||
uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base; | uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base; | ||||
v_input_data_addr.push_back(variable_addr); | v_input_data_addr.push_back(variable_addr); | ||||
GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", | GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", | ||||
@@ -363,6 +372,7 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||||
mem_addr = reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(input_offset)); | mem_addr = reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(input_offset)); | ||||
v_input_data_addr.push_back(mem_addr); | v_input_data_addr.push_back(mem_addr); | ||||
} else { | } else { | ||||
VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, input_offset); | |||||
mem_addr = model_param.mem_base + input_offset; | mem_addr = model_param.mem_base + input_offset; | ||||
v_input_data_addr.push_back(mem_addr); | v_input_data_addr.push_back(mem_addr); | ||||
} | } | ||||
@@ -398,6 +408,7 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C | |||||
} | } | ||||
for (size_t i = 0; i < outputs_size; ++i) { | for (size_t i = 0; i < outputs_size; ++i) { | ||||
GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), | GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), | ||||
VALIDATE_MEM_RANGE(op_desc, model_param.var_size, v_output_offset[i] - model_param.logic_var_base); | |||||
uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base; | uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base; | ||||
v_output_data_addr.push_back(variable_addr); | v_output_data_addr.push_back(variable_addr); | ||||
GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", | GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", | ||||
@@ -410,6 +421,7 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C | |||||
mem_addr = reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_output_offset[i])); | mem_addr = reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_output_offset[i])); | ||||
v_output_data_addr.push_back(mem_addr); | v_output_data_addr.push_back(mem_addr); | ||||
} else { | } else { | ||||
VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_output_offset[i]); | |||||
mem_addr = static_cast<uint8_t *>(model_param.mem_base + v_output_offset[i]); | mem_addr = static_cast<uint8_t *>(model_param.mem_base + v_output_offset[i]); | ||||
v_output_data_addr.push_back(mem_addr); | v_output_data_addr.push_back(mem_addr); | ||||
} | } | ||||
@@ -440,15 +452,19 @@ vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param | |||||
for (size_t i = 0; i < v_workspace_bytes.size(); ++i) { | for (size_t i = 0; i < v_workspace_bytes.size(); ++i) { | ||||
if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { | if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { | ||||
v_workspace_data_addr.push_back(reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_workspace_offset[i]))); | v_workspace_data_addr.push_back(reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_workspace_offset[i]))); | ||||
GELOGI("Fusion: op: %s, GetWorkspaceDataAddrs mem_addr[workspace index %zu]:%p", op_desc->GetName().c_str(), i, | |||||
reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_workspace_offset[i]))); | |||||
GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[L1] name[%s], mem_addr[workspace index %zu]:0x%lx", | |||||
model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i]); | |||||
} else if (v_workspace_bytes[i] == 0) { | |||||
v_workspace_data_addr.push_back(nullptr); | |||||
GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] Null addr", | |||||
model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i]); | |||||
} else { | } else { | ||||
int64_t workspace_offset = v_workspace_offset[i]; | |||||
int64_t workspace_bytes = v_workspace_bytes[i]; | |||||
uint8_t *mem_addr = workspace_bytes == 0 ? nullptr : model_param.mem_base + workspace_offset; | |||||
VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_workspace_offset[i]); | |||||
uint8_t *mem_addr = model_param.mem_base + v_workspace_offset[i]; | |||||
v_workspace_data_addr.push_back(mem_addr); | v_workspace_data_addr.push_back(mem_addr); | ||||
GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] memaddr[%p]", | GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] memaddr[%p]", | ||||
model_param.graph_id, op_desc->GetName().c_str(), i, workspace_offset, workspace_bytes, mem_addr); | |||||
model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i], | |||||
mem_addr); | |||||
} | } | ||||
} | } | ||||
@@ -26,6 +26,7 @@ | |||||
#include "framework/common/l2_cache_optimize.h" | #include "framework/common/l2_cache_optimize.h" | ||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | #include "graph/load/new_model_manager/davinci_model.h" | ||||
#include "graph/load/new_model_manager/model_manager.h" | |||||
#include "graph/load/new_model_manager/model_utils.h" | #include "graph/load/new_model_manager/model_utils.h" | ||||
#include "runtime/kernel.h" | #include "runtime/kernel.h" | ||||
#include "super_kernel/super_kernel.h" | #include "super_kernel/super_kernel.h" | ||||
@@ -41,13 +42,6 @@ const char *kIsLastNode = "is_last_node"; | |||||
const char *kIsFirstNode = "is_first_node"; | const char *kIsFirstNode = "is_first_node"; | ||||
const int64_t kCloseSkt = 100; | const int64_t kCloseSkt = 100; | ||||
const uint32_t kAddrLen = sizeof(void *); | const uint32_t kAddrLen = sizeof(void *); | ||||
const char *const kLoadOpFromBuf = "loadOpFromBuf"; | |||||
struct CustAicpuSoBuf { | |||||
uint64_t kernelSoBuf; | |||||
uint32_t kernelSoBufLen; | |||||
uint64_t kernelSoName; | |||||
uint32_t kernelSoNameLen; | |||||
} __attribute__((packed)); | |||||
} // namespace | } // namespace | ||||
namespace ge { | namespace ge { | ||||
@@ -861,92 +855,6 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status KernelTaskInfo::LaunchCustAicpuSo(const OpDescPtr op_desc, const domi::KernelDef &kernel_def) { | |||||
CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); | |||||
if (aicpu_kernel == nullptr) { | |||||
GELOGE(INTERNAL_ERROR, "cust aicpu op %s can't find kernel!", op_desc->GetName().c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
const void *aicpu_data = aicpu_kernel->GetBinData(); | |||||
uint32_t aicpu_data_length = aicpu_kernel->GetBinDataSize(); | |||||
void *d_aicpu_data = nullptr; | |||||
rtError_t status = rtMalloc(&d_aicpu_data, aicpu_data_length, RT_MEMORY_HBM); | |||||
if (status != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | |||||
status = rtMemcpy(d_aicpu_data, aicpu_data_length, aicpu_data, aicpu_data_length, RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (status != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | |||||
void *d_so_name = nullptr; | |||||
status = rtMalloc(&d_so_name, so_name_.size(), RT_MEMORY_HBM); | |||||
if (status != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | |||||
status = rtMemcpy(d_so_name, so_name_.size(), reinterpret_cast<const void *>(so_name_.c_str()), so_name_.size(), | |||||
RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (status != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | |||||
CustAicpuSoBuf cust_aicpu_so_buf; | |||||
cust_aicpu_so_buf.kernelSoBuf = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_aicpu_data)); | |||||
cust_aicpu_so_buf.kernelSoBufLen = aicpu_data_length; | |||||
cust_aicpu_so_buf.kernelSoName = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_so_name)); | |||||
cust_aicpu_so_buf.kernelSoNameLen = so_name_.size(); | |||||
void *args = nullptr; | |||||
uint32_t args_size = sizeof(CustAicpuSoBuf); | |||||
status = rtMalloc(&args, args_size, RT_MEMORY_HBM); | |||||
if (status != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | |||||
GELOGI("loadOpFromBuf kernelSoBuf %p, kernelSoBufLen %u, kernelSoName %p, kernelSoNameLen %u.", d_aicpu_data, | |||||
aicpu_data_length, d_so_name, so_name_.size()); | |||||
status = rtMemcpy(args, args_size, static_cast<void *>(&cust_aicpu_so_buf), args_size, RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (status != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | |||||
rtStream_t stream = nullptr; | |||||
status = rtStreamCreate(&stream, 0); | |||||
if (status != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt create stream failed, status: 0x%x", status); | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | |||||
status = rtCpuKernelLaunch(nullptr, kLoadOpFromBuf, 1, args, args_size, nullptr, stream); | |||||
if (status != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch loadOpFromBuf failed, status: 0x%X", status); | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | |||||
GELOGI("Cpu kernel launch loadOpFromBuf."); | |||||
status = rtStreamSynchronize(stream); | |||||
if (status != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | |||||
GE_CHK_RT(rtFree(args)); | |||||
GE_CHK_RT(rtFree(d_aicpu_data)); | |||||
GE_CHK_RT(rtFree(d_so_name)); | |||||
GELOGI("Cpu kernel launch loadOpFromBuf task success."); | |||||
return SUCCESS; | |||||
} | |||||
Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &kernel_def) { | Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &kernel_def) { | ||||
GELOGI("Do InitAicpuTask"); | GELOGI("Do InitAicpuTask"); | ||||
so_name_ = kernel_def.so_name(); | so_name_ = kernel_def.so_name(); | ||||
@@ -961,7 +869,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
} | } | ||||
if (kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { | if (kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { | ||||
GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, kernel_def), "launch cust aicpu so failed"); | |||||
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_), "launch cust aicpu so failed"); | |||||
} | } | ||||
// copy args to new host memory | // copy args to new host memory | ||||
@@ -106,8 +106,6 @@ class KernelTaskInfo : public TaskInfo { | |||||
Status InitAicpuTaskExtInfo(const std::string &ext_info); | Status InitAicpuTaskExtInfo(const std::string &ext_info); | ||||
Status LaunchCustAicpuSo(const OpDescPtr op_desc, const domi::KernelDef &kernel_def); | |||||
Status StoreInputOutputTensor(const std::vector<void *> &input_data_addrs, | Status StoreInputOutputTensor(const std::vector<void *> &input_data_addrs, | ||||
const std::vector<void *> &output_data_addrs, | const std::vector<void *> &output_data_addrs, | ||||
const std::vector<::tagCcAICPUTensor> &input_descs, | const std::vector<::tagCcAICPUTensor> &input_descs, | ||||
@@ -118,13 +118,11 @@ bool ZeroCopyTask::CheckDynamicBatch(const map<string, set<uintptr_t>> &batch_ad | |||||
*/ | */ | ||||
Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map<string, set<uintptr_t>> &batch_addrs, | Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map<string, set<uintptr_t>> &batch_addrs, | ||||
const string &batch_label) { | const string &batch_label) { | ||||
for (auto pair : task_addr_offset_) { | |||||
if (pair.first != addr) { | |||||
continue; | |||||
} | |||||
auto iter = task_addr_offset_.find(addr); | |||||
if (iter != task_addr_offset_.end()) { | |||||
auto &cur_pair = *iter; | |||||
uint8_t *args_info = args_info_.data(); | uint8_t *args_info = args_info_.data(); | ||||
for (auto offset : pair.second) { | |||||
for (auto offset : cur_pair.second) { | |||||
if (!CheckDynamicBatch(batch_addrs, batch_label, reinterpret_cast<uintptr_t>(args_addr_ + offset))) { | if (!CheckDynamicBatch(batch_addrs, batch_label, reinterpret_cast<uintptr_t>(args_addr_ + offset))) { | ||||
continue; | continue; | ||||
} | } | ||||
@@ -83,6 +83,10 @@ class ZeroCopyTask { | |||||
*/ | */ | ||||
ge::Status DistributeParam(bool async_mode, rtStream_t stream); | ge::Status DistributeParam(bool async_mode, rtStream_t stream); | ||||
void SetBatchLabel(const string &batch_label) { batch_label_ = batch_label; } | |||||
const string &GetBatchLabel() const { return batch_label_; } | |||||
protected: | protected: | ||||
bool CheckDynamicBatch(const map<string, set<uintptr_t>> &batch_addrs, const string &batch_label, uintptr_t addr); | bool CheckDynamicBatch(const map<string, set<uintptr_t>> &batch_addrs, const string &batch_label, uintptr_t addr); | ||||
@@ -93,7 +97,7 @@ class ZeroCopyTask { | |||||
const size_t args_size_; | const size_t args_size_; | ||||
vector<uint8_t> args_info_; | vector<uint8_t> args_info_; | ||||
bool is_updated_; | bool is_updated_; | ||||
string batch_label_; | |||||
// <address from Op, {offset in args}> | // <address from Op, {offset in args}> | ||||
map<uintptr_t, vector<size_t>> task_addr_offset_; | map<uintptr_t, vector<size_t>> task_addr_offset_; | ||||
}; | }; | ||||
@@ -267,6 +267,14 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||||
auto compute_graph = GraphUtils::GetComputeGraph(graph); | auto compute_graph = GraphUtils::GetComputeGraph(graph); | ||||
if (compute_graph != nullptr) { | if (compute_graph != nullptr) { | ||||
compute_graph->SetGraphID(graph_id); | compute_graph->SetGraphID(graph_id); | ||||
bool graph_has_been_added = false; | |||||
if (AttrUtils::GetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, graph_has_been_added) && | |||||
graph_has_been_added) { | |||||
GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[GraphManager] same graph object can not be added again, graph_id = %u.", | |||||
graph_id); | |||||
return GE_GRAPH_GRAPH_ALREADY_EXIST; | |||||
} | |||||
(void)AttrUtils::SetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, true); | |||||
} else { | } else { | ||||
GELOGE(FAILED, "compute graph is null"); | GELOGE(FAILED, "compute graph is null"); | ||||
return FAILED; | return FAILED; | ||||
@@ -1953,9 +1961,9 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||||
names_to_passes.emplace_back("MergePass", &merge_pass); | names_to_passes.emplace_back("MergePass", &merge_pass); | ||||
names_to_passes.emplace_back("CastRemovePass", &cast_remove_pass); | names_to_passes.emplace_back("CastRemovePass", &cast_remove_pass); | ||||
names_to_passes.emplace_back("TransposeTransDataPass", &transpose_transdata_pass); | names_to_passes.emplace_back("TransposeTransDataPass", &transpose_transdata_pass); | ||||
names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); | |||||
names_to_passes.emplace_back("TransOpSymmetryEliminationPass", &symmetry_elimination_pass); | names_to_passes.emplace_back("TransOpSymmetryEliminationPass", &symmetry_elimination_pass); | ||||
names_to_passes.emplace_back("TransOpNearbyAllreduceFusionPass", &trans_op_nearby_allreduce_fusion_pass); | names_to_passes.emplace_back("TransOpNearbyAllreduceFusionPass", &trans_op_nearby_allreduce_fusion_pass); | ||||
names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); | |||||
names_to_passes.emplace_back("DimensionComputePass", &dimension_compute_pass); | names_to_passes.emplace_back("DimensionComputePass", &dimension_compute_pass); | ||||
names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); | names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); | ||||
names_to_passes.emplace_back("DimensionAdjustPass", &dimension_adjust_pass); | names_to_passes.emplace_back("DimensionAdjustPass", &dimension_adjust_pass); | ||||
@@ -2787,11 +2795,18 @@ Status GraphManager::SaveVariables(const Graph &graph, const std::vector<std::st | |||||
GELOGE(FAILED, "Fetch var[%s] value failed.", var_name.c_str()); | GELOGE(FAILED, "Fetch var[%s] value failed.", var_name.c_str()); | ||||
return FAILED; | return FAILED; | ||||
} else { | } else { | ||||
auto var_tensor = var_results[var_name].GetTensorDesc(); | |||||
var_tensor.SetName(var_name); | |||||
var_results[var_name].SetTensorDesc(var_tensor); | |||||
var_values.emplace_back(var_results[var_name]); | var_values.emplace_back(var_results[var_name]); | ||||
} | } | ||||
} | } | ||||
} else { | } else { | ||||
for (auto iter = var_results.begin(); iter != var_results.end(); ++iter) { | for (auto iter = var_results.begin(); iter != var_results.end(); ++iter) { | ||||
string var_name = iter->first; | |||||
auto var_tensor = iter->second.GetTensorDesc(); | |||||
var_tensor.SetName(var_name); | |||||
iter->second.SetTensorDesc(var_tensor); | |||||
var_values.emplace_back(iter->second); | var_values.emplace_back(iter->second); | ||||
} | } | ||||
} | } | ||||
@@ -491,7 +491,7 @@ Status SplitIdentityAlongAnchor(const OutDataAnchorPtr &out_data_anchor, const I | |||||
if (input_rw_type == InputRWType::kScopeWriteable || input_rw_type == InputRWType::kWriteable) { | if (input_rw_type == InputRWType::kScopeWriteable || input_rw_type == InputRWType::kWriteable) { | ||||
auto new_identity = CreateIdentityAfterSrcNode(*pre_node, pre_out_data_anchor->GetIdx()); | auto new_identity = CreateIdentityAfterSrcNode(*pre_node, pre_out_data_anchor->GetIdx()); | ||||
GE_CHECK_NOTNULL(new_identity); | GE_CHECK_NOTNULL(new_identity); | ||||
if (GraphUtils::AddEdge(pre_out_data_anchor, new_identity->GetInDataAnchor(kIdentityAnchorIndex)) != SUCCESS && | |||||
if (GraphUtils::AddEdge(pre_out_data_anchor, new_identity->GetInDataAnchor(kIdentityAnchorIndex)) != SUCCESS || | |||||
GraphUtils::AddEdge(new_identity->GetOutDataAnchor(kIdentityAnchorIndex), peer_in_data_anchor) != SUCCESS) { | GraphUtils::AddEdge(new_identity->GetOutDataAnchor(kIdentityAnchorIndex), peer_in_data_anchor) != SUCCESS) { | ||||
GELOGE(INTERNAL_ERROR, "Failed to insert Identity between node %s and %s", | GELOGE(INTERNAL_ERROR, "Failed to insert Identity between node %s and %s", | ||||
pre_out_data_anchor->GetOwnerNode()->GetName().c_str(), | pre_out_data_anchor->GetOwnerNode()->GetName().c_str(), | ||||
@@ -23,6 +23,7 @@ | |||||
#include <mutex> | #include <mutex> | ||||
#include "common/op/ge_op_utils.h" | #include "common/op/ge_op_utils.h" | ||||
#include "common/util/error_manager/error_manager.h" | |||||
#include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
#include "graph/utils/op_desc_utils.h" | #include "graph/utils/op_desc_utils.h" | ||||
#include "init/gelib.h" | #include "init/gelib.h" | ||||
@@ -82,6 +83,8 @@ Status EnginePlacer::Run() { | |||||
// If can't get op's engine name, keep check support finish and return failed | // If can't get op's engine name, keep check support finish and return failed | ||||
if (engine_name.empty()) { | if (engine_name.empty()) { | ||||
is_check_support_success = false; | is_check_support_success = false; | ||||
ErrorManager::GetInstance().ATCReportErrMessage("E13003", {"opname", "optype"}, | |||||
{op_desc->GetName(), op_desc->GetType()}); | |||||
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Can not find engine of op type %s", | GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Can not find engine of op type %s", | ||||
node_ptr->GetOpDesc()->GetType().c_str()); | node_ptr->GetOpDesc()->GetType().c_str()); | ||||
continue; | continue; | ||||
@@ -89,16 +89,13 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { | |||||
nodes.push(node); | nodes.push(node); | ||||
static const std::set<std::string> end_type_set = {STREAMSWITCH, STREAMMERGE, MERGE}; | static const std::set<std::string> end_type_set = {STREAMSWITCH, STREAMMERGE, MERGE}; | ||||
bool merge_flag = false; | |||||
bool exit_flag = false; | |||||
bool net_output_flag = false; | |||||
while (!nodes.empty()) { | while (!nodes.empty()) { | ||||
NodePtr cur_node = nodes.top(); | NodePtr cur_node = nodes.top(); | ||||
nodes.pop(); | nodes.pop(); | ||||
if (visited.count(cur_node) > 0) { | if (visited.count(cur_node) > 0) { | ||||
continue; | continue; | ||||
} | } | ||||
if (AttachFlag(cur_node, stream_label, merge_flag, exit_flag, net_output_flag) != SUCCESS) { | |||||
if (AttachFlag(cur_node, stream_label) != SUCCESS) { | |||||
GELOGE(FAILED, "Attach flag for node %s failed.", cur_node->GetName().c_str()); | GELOGE(FAILED, "Attach flag for node %s failed.", cur_node->GetName().c_str()); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -122,12 +119,6 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { | |||||
GE_CHK_STATUS_RET(SetActiveLabelList(node, {stream_label}), "set active_label_list failed."); | GE_CHK_STATUS_RET(SetActiveLabelList(node, {stream_label}), "set active_label_list failed."); | ||||
} | } | ||||
bool attach_flag = (merge_flag || exit_flag) && net_output_flag; | |||||
if (attach_flag) { | |||||
GELOGI("No need to keep on attaching label."); | |||||
return SUCCESS; | |||||
} | |||||
for (const NodePtr &tmp_node : branch_nodes) { | for (const NodePtr &tmp_node : branch_nodes) { | ||||
GELOGD("Attach label %s to node: %s.", stream_label.c_str(), tmp_node->GetName().c_str()); | GELOGD("Attach label %s to node: %s.", stream_label.c_str(), tmp_node->GetName().c_str()); | ||||
GE_CHK_STATUS_RET(SetStreamLabel(tmp_node, stream_label), "Set stream label failed."); | GE_CHK_STATUS_RET(SetStreamLabel(tmp_node, stream_label), "Set stream label failed."); | ||||
@@ -140,13 +131,9 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { | |||||
/// @brief attach flag | /// @brief attach flag | ||||
/// @param [in] node | /// @param [in] node | ||||
/// @param [out] stream_label | /// @param [out] stream_label | ||||
/// @param [out] merge_flag | |||||
/// @param [out] exit_flag | |||||
/// @param [out] net_output_flag | |||||
/// @return Status | /// @return Status | ||||
/// | /// | ||||
Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &stream_label, bool &merge_flag, | |||||
bool &exit_flag, bool &net_output_flag) { | |||||
Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &stream_label) { | |||||
const std::string &type = node->GetType(); | const std::string &type = node->GetType(); | ||||
if (type == STREAMSWITCH) { | if (type == STREAMSWITCH) { | ||||
if (node->GetInDataNodes().empty()) { | if (node->GetInDataNodes().empty()) { | ||||
@@ -164,12 +151,8 @@ Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &strea | |||||
} else if (type == STREAMMERGE) { | } else if (type == STREAMMERGE) { | ||||
stream_label = node->GetName(); | stream_label = node->GetName(); | ||||
GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); | GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); | ||||
merge_flag = true; | |||||
} else if ((type == EXIT) || (type == REFEXIT)) { | } else if ((type == EXIT) || (type == REFEXIT)) { | ||||
GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); | GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); | ||||
exit_flag = true; | |||||
} else if (type == NETOUTPUT) { | |||||
net_output_flag = true; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -50,13 +50,9 @@ class AttachStreamLabelPass : public GraphPass { | |||||
/// @brief attach flag | /// @brief attach flag | ||||
/// @param [in] node | /// @param [in] node | ||||
/// @param [out] stream_label | /// @param [out] stream_label | ||||
/// @param [out] merge_flag | |||||
/// @param [out] exit_flag | |||||
/// @param [out] net_output_flag | |||||
/// @return Status | /// @return Status | ||||
/// | /// | ||||
static Status AttachFlag(const NodePtr &node, std::string &stream_label, bool &merge_flag, bool &exit_flag, | |||||
bool &net_output_flag); | |||||
static Status AttachFlag(const NodePtr &node, std::string &stream_label); | |||||
/// | /// | ||||
/// @brief Update stream_label for loop_branch | /// @brief Update stream_label for loop_branch | ||||
@@ -20,13 +20,14 @@ | |||||
#include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
#include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
namespace { | |||||
const size_t kOutNodesNum = 1; | |||||
} | |||||
namespace ge { | namespace ge { | ||||
Status EnterPass::Run(NodePtr &node) { | Status EnterPass::Run(NodePtr &node) { | ||||
GELOGD("EnterPass running"); | GELOGD("EnterPass running"); | ||||
if (node == nullptr) { | |||||
GELOGE(PARAM_INVALID, "param [node] must not be null."); | |||||
return PARAM_INVALID; | |||||
} | |||||
GE_CHECK_NOTNULL(node); | |||||
if ((node->GetType() != ENTER) && (node->GetType() != REFENTER)) { | if ((node->GetType() != ENTER) && (node->GetType() != REFENTER)) { | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -38,18 +39,17 @@ Status EnterPass::Run(NodePtr &node) { | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
NodePtr in_node = node->GetInDataNodes().at(0); | NodePtr in_node = node->GetInDataNodes().at(0); | ||||
if (in_node == nullptr) { | |||||
GELOGE(PARAM_INVALID, "param [in_node] must not be null"); | |||||
return PARAM_INVALID; | |||||
} | |||||
GE_CHECK_NOTNULL(in_node); | |||||
if ((in_node->GetType() != CONSTANT) && (in_node->GetType() != CONSTANTOP)) { | if ((in_node->GetType() != CONSTANT) && (in_node->GetType() != CONSTANTOP)) { | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
bool need_remove_flag = | |||||
in_node->GetInControlNodes().empty() && node->GetInControlNodes().empty() && node->GetOutDataNodes().empty(); | |||||
if (need_remove_flag) { | |||||
bool need_remove_flag = in_node->GetInControlNodes().empty() && node->GetInControlNodes().empty(); | |||||
if (!need_remove_flag) { | |||||
return SUCCESS; | |||||
} | |||||
if (node->GetOutDataNodes().empty()) { | |||||
for (auto &out_ctrl_node : node->GetOutControlNodes()) { | for (auto &out_ctrl_node : node->GetOutControlNodes()) { | ||||
if (out_ctrl_node == nullptr) { | if (out_ctrl_node == nullptr) { | ||||
continue; | continue; | ||||
@@ -60,9 +60,47 @@ Status EnterPass::Run(NodePtr &node) { | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} | } | ||||
} else { | |||||
if (OptimizeEnter(node, in_node) != SUCCESS) { | |||||
GELOGE(FAILED, "Optimize enter node[%s] failed.", node->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
} | } | ||||
GELOGD("EnterPass success"); | GELOGD("EnterPass success"); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status EnterPass::OptimizeEnter(NodePtr &node, NodePtr &in_node) { | |||||
auto out_nodes_of_in_node = in_node->GetOutAllNodes(); | |||||
if (out_nodes_of_in_node.size() != kOutNodesNum) { | |||||
return SUCCESS; | |||||
} | |||||
if (!node->GetOutControlNodes().empty()) { | |||||
return SUCCESS; | |||||
} | |||||
for (const auto &out_node : node->GetOutDataNodes()) { | |||||
GE_CHECK_NOTNULL(out_node); | |||||
if (out_node->GetType() == MERGE) { | |||||
return SUCCESS; | |||||
} | |||||
} | |||||
GE_CHECK_NOTNULL(in_node->GetOutDataAnchor(0)); | |||||
GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0))); | |||||
auto out_data_anchor = node->GetOutDataAnchor(0); | |||||
GE_CHECK_NOTNULL(out_data_anchor); | |||||
for (auto peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { | |||||
GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor)); | |||||
GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor)); | |||||
} | |||||
auto graph = node->GetOwnerComputeGraph(); | |||||
GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph, node)) | |||||
AddRePassNodesWithInOut(in_node); | |||||
return SUCCESS; | |||||
} | |||||
} // namespace ge | } // namespace ge |
@@ -23,6 +23,9 @@ namespace ge { | |||||
class EnterPass : public BaseNodePass { | class EnterPass : public BaseNodePass { | ||||
public: | public: | ||||
Status Run(NodePtr &node) override; | Status Run(NodePtr &node) override; | ||||
private: | |||||
Status OptimizeEnter(NodePtr &node, NodePtr &in_node); | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_GRAPH_PASSES_ENTER_PASS_H_ | #endif // GE_GRAPH_PASSES_ENTER_PASS_H_ |
@@ -190,6 +190,10 @@ Status ForPass::FindInputsAndOutputs(const NodePtr &node, std::vector<OutDataAnc | |||||
GELOGE(FAILED, "FindInputWithIndex %s:%u failed: in_data_anchor is NULL.", node->GetName().c_str(), index); | GELOGE(FAILED, "FindInputWithIndex %s:%u failed: in_data_anchor is NULL.", node->GetName().c_str(), index); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
GE_IF_BOOL_EXEC( | |||||
in_data_anchor->GetPeerOutAnchor() == nullptr, | |||||
GELOGW("Get null input by index %d from node %s ", in_data_anchor->GetIdx(), node->GetName().c_str()); | |||||
continue); | |||||
data_inputs.emplace_back(in_data_anchor->GetPeerOutAnchor()); | data_inputs.emplace_back(in_data_anchor->GetPeerOutAnchor()); | ||||
} | } | ||||
@@ -239,7 +239,7 @@ Status MultiBatchClonePass::CreateIndexConstNode(const ComputeGraphPtr &graph, N | |||||
GeTensorDesc const_tensor(GeShape({count}), FORMAT_ND, DT_INT32); | GeTensorDesc const_tensor(GeShape({count}), FORMAT_ND, DT_INT32); | ||||
GeTensor tensor(const_tensor); | GeTensor tensor(const_tensor); | ||||
tensor.SetData(reinterpret_cast<uint8_t *>(addr.get()), count * sizeof(int32_t)); | |||||
(void)tensor.SetData(reinterpret_cast<uint8_t *>(addr.get()), count * sizeof(int32_t)); | |||||
if (!AttrUtils::SetTensor(const_desc, ATTR_NAME_WEIGHTS, tensor)) { | if (!AttrUtils::SetTensor(const_desc, ATTR_NAME_WEIGHTS, tensor)) { | ||||
GELOGE(OUT_OF_MEMORY, "Failed to init tensor value for const %s", const_desc->GetName().c_str()); | GELOGE(OUT_OF_MEMORY, "Failed to init tensor value for const %s", const_desc->GetName().c_str()); | ||||
return FAILED; | return FAILED; | ||||
@@ -50,9 +50,12 @@ Status InsertReshapeIfNeed(const NodePtr &node) { | |||||
GE_CHECK_NOTNULL(src_tensor); | GE_CHECK_NOTNULL(src_tensor); | ||||
for (auto dst_anchor : src_anchor->GetPeerInDataAnchors()) { | for (auto dst_anchor : src_anchor->GetPeerInDataAnchors()) { | ||||
auto dst_node = dst_anchor->GetOwnerNode(); | auto dst_node = dst_anchor->GetOwnerNode(); | ||||
GELOGD("Try insert reshape between %s[%d] and %s[%d] to keep the shape continues", node->GetName().c_str(), | |||||
src_anchor->GetIdx(), dst_node->GetName().c_str(), dst_anchor->GetIdx()); | |||||
GE_CHECK_NOTNULL(dst_node); | GE_CHECK_NOTNULL(dst_node); | ||||
GE_CHECK_NOTNULL(dst_node->GetOpDesc()); | GE_CHECK_NOTNULL(dst_node->GetOpDesc()); | ||||
auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx()); | auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx()); | ||||
GE_CHECK_NOTNULL(dst_tensor); | |||||
bool is_need_insert_reshape = src_tensor->GetShape().GetDims() != UNKNOWN_RANK && | bool is_need_insert_reshape = src_tensor->GetShape().GetDims() != UNKNOWN_RANK && | ||||
dst_tensor->GetShape().GetDims() != UNKNOWN_RANK && | dst_tensor->GetShape().GetDims() != UNKNOWN_RANK && | ||||
src_tensor->GetShape().GetDims() != dst_tensor->GetShape().GetDims(); | src_tensor->GetShape().GetDims() != dst_tensor->GetShape().GetDims(); | ||||
@@ -176,6 +176,9 @@ Status SubgraphPass::WhileInputNodes(const ComputeGraphPtr &graph, const NodePtr | |||||
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | ||||
NodePtr in_node = peer_out_anchor->GetOwnerNode(); | NodePtr in_node = peer_out_anchor->GetOwnerNode(); | ||||
GE_CHECK_NOTNULL(in_node); | GE_CHECK_NOTNULL(in_node); | ||||
if (in_node->GetType() == VARIABLE || in_node->GetType() == VARHANDLEOP || in_node->GetType() == VARIABLEV2) { | |||||
continue; | |||||
} | |||||
// Input->While and Input link to other nodes need insert memcpy | // Input->While and Input link to other nodes need insert memcpy | ||||
if (peer_out_anchor->GetPeerInDataAnchors().size() > 1) { | if (peer_out_anchor->GetPeerInDataAnchors().size() > 1) { | ||||
GELOGD("Input %s of While %s links to other nodes.", in_node->GetName().c_str(), node->GetName().c_str()); | GELOGD("Input %s of While %s links to other nodes.", in_node->GetName().c_str(), node->GetName().c_str()); | ||||
@@ -124,7 +124,14 @@ Status GetDataDimN(const ge::NodePtr &data_node, ge::Format format, int64_t &bat | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
} | } | ||||
GELOGE(PARAM_INVALID, "when dynamic aipp, shape must be in range [3, 4], but is %zu", shape.size()); | |||||
string errormsg = | |||||
"its shape size must be in range[3,4] which dynamic aipp is linked, " | |||||
"maybe this input is not suitable for dynamic aipp"; | |||||
ErrorManager::GetInstance().ATCReportErrMessage( | |||||
"E10001", {"parameter", "value", "reason"}, | |||||
{data_node->GetName() + " shape size", to_string(shape.size()), errormsg}); | |||||
GELOGE(PARAM_INVALID, "The shape size of this node [%s] which linked dynamic aipp must be in range[3, 4], but is %zu", | |||||
data_node->GetName().c_str(), shape.size()); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
@@ -272,7 +279,6 @@ Status AippOp::AddAippAttrbutes(const OpDescPtr &op_desc, const std::string &aip | |||||
GE_CHK_BOOL_RET_STATUS(AttrUtils::SetInt(op_desc, kCurrentAippIndex, index), INTERNAL_ERROR, | GE_CHK_BOOL_RET_STATUS(AttrUtils::SetInt(op_desc, kCurrentAippIndex, index), INTERNAL_ERROR, | ||||
"Set kCurrentAippIndex attr for aipp node failed"); | "Set kCurrentAippIndex attr for aipp node failed"); | ||||
// add input/output desc | // add input/output desc | ||||
GeTensorDesc tensor; | GeTensorDesc tensor; | ||||
GE_CHK_GRAPH_STATUS_RET(op_desc->AddInputDesc("images", tensor), "Failed to add input images for aipp node"); | GE_CHK_GRAPH_STATUS_RET(op_desc->AddInputDesc("images", tensor), "Failed to add input images for aipp node"); | ||||
@@ -318,6 +324,7 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr | |||||
GELOGE(PARAM_INVALID, "Get target input node for rank %d failed", rank); | GELOGE(PARAM_INVALID, "Get target input node for rank %d failed", rank); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
data_node_linked_aipp = data_node; | |||||
auto data_opdesc = data_node->GetOpDesc(); | auto data_opdesc = data_node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(data_opdesc); | GE_CHECK_NOTNULL(data_opdesc); | ||||
string set_dt_str; | string set_dt_str; | ||||
@@ -330,10 +337,17 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
// add dynamic or static attr memsage to data | |||||
if (GetAippMode() == domi::AippOpParams::static_) { | |||||
(void)AttrUtils::SetStr(data_opdesc, ATTR_DATA_RELATED_AIPP_MODE, "static_aipp"); | |||||
} else if (GetAippMode() == domi::AippOpParams::dynamic) { | |||||
(void)AttrUtils::SetStr(data_opdesc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp"); | |||||
} | |||||
// In scenario AIPP+CONV2D+POOLING, keep the aipp info to Data, since AIPP disappear after subgraph optimize | // In scenario AIPP+CONV2D+POOLING, keep the aipp info to Data, since AIPP disappear after subgraph optimize | ||||
GeAttrValue::NAMED_ATTRS aipp_attr; | GeAttrValue::NAMED_ATTRS aipp_attr; | ||||
ConvertParamToAttr(aipp_attr); | ConvertParamToAttr(aipp_attr); | ||||
if (!AttrUtils::SetNamedAttrs(data_node->GetOpDesc(), ATTR_NAME_AIPP, aipp_attr)) { | |||||
if (!AttrUtils::SetNamedAttrs(data_opdesc, ATTR_NAME_AIPP, aipp_attr)) { | |||||
GELOGE(INTERNAL_ERROR, "Set name attrs for Data node failed. id: %d", rank); | GELOGE(INTERNAL_ERROR, "Set name attrs for Data node failed. id: %d", rank); | ||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
@@ -737,7 +751,7 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { | |||||
data_shape_n = data_op_desc->MutableInputDesc(0)->GetShape().GetDim(0); | data_shape_n = data_op_desc->MutableInputDesc(0)->GetShape().GetDim(0); | ||||
} | } | ||||
vector<int64_t> dynamic_aipp_linked_data_shape{data_shape_n, kDynamicDim, kDynamicDim, kDynamicDim}; | vector<int64_t> dynamic_aipp_linked_data_shape{data_shape_n, kDynamicDim, kDynamicDim, kDynamicDim}; | ||||
(void)AttrUtils::SetListInt(data_op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_linked_data_shape); | |||||
(void)AttrUtils::SetListInt(data_op_desc, ATTR_DYNAMIC_AIPP_INPUT_DIMS, dynamic_aipp_linked_data_shape); | |||||
int64_t batch_count = -1; | int64_t batch_count = -1; | ||||
if (GetDataDimN(data_node, ori_data_format, batch_count) != ge::SUCCESS) { | if (GetDataDimN(data_node, ori_data_format, batch_count) != ge::SUCCESS) { | ||||
@@ -759,7 +773,24 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) { | |||||
return AddNodeToGraph(aipp_node, max_dynamic_aipp_size); | return AddNodeToGraph(aipp_node, max_dynamic_aipp_size); | ||||
} | } | ||||
Status AippOp::AddAttrToAippData(const OpDescPtr &aipp_data_op_desc) { | |||||
// Add dynamic aipp config to aipp_data | |||||
GeAttrValue::NAMED_ATTRS aipp_attr; | |||||
ConvertParamToAttr(aipp_attr); | |||||
(void)AttrUtils::SetNamedAttrs(aipp_data_op_desc, ATTR_NAME_AIPP, aipp_attr); | |||||
(void)AttrUtils::SetStr(aipp_data_op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp_conf"); | |||||
// add node name attr to data linked aipp_data, it can be queried by acl. | |||||
GE_CHECK_NOTNULL(data_node_linked_aipp); | |||||
auto data_op_desc = data_node_linked_aipp->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(data_op_desc); | |||||
(void)AttrUtils::SetStr(data_op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, aipp_data_op_desc->GetName()); | |||||
(void)AttrUtils::SetStr(aipp_data_op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, data_op_desc->GetName()); | |||||
return SUCCESS; | |||||
} | |||||
Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size) { | Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size) { | ||||
static int index = 0; | |||||
std::vector<int64_t> input_shape_dim(1, max_dynamic_aipp_size); | std::vector<int64_t> input_shape_dim(1, max_dynamic_aipp_size); | ||||
GeShape input_shape(input_shape_dim); | GeShape input_shape(input_shape_dim); | ||||
// construct input tensor | // construct input tensor | ||||
@@ -767,18 +798,21 @@ Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp | |||||
TensorUtils::SetReuseInput(input_tensor, false); | TensorUtils::SetReuseInput(input_tensor, false); | ||||
TensorUtils::SetSize(input_tensor, max_dynamic_aipp_size); | TensorUtils::SetSize(input_tensor, max_dynamic_aipp_size); | ||||
// Only flush subgraph name | |||||
const ComputeGraphPtr &graph = aipp_node->GetOwnerComputeGraph(); | const ComputeGraphPtr &graph = aipp_node->GetOwnerComputeGraph(); | ||||
string node_name = (graph->GetParentGraph() == nullptr) ? kDynamicAippData : (graph->GetName() + "_" + node_name); | |||||
string node_name; | |||||
if (index == 0) { | |||||
node_name = kDynamicAippData; | |||||
} else { | |||||
node_name = string(kDynamicAippData) + "_" + to_string(index); | |||||
} | |||||
++index; | |||||
// new add aipp_data ops for dynamic aipp param input | // new add aipp_data ops for dynamic aipp param input | ||||
OpDescPtr op_desc_ptr_data = MakeShared<OpDesc>(node_name, AIPPDATA); | OpDescPtr op_desc_ptr_data = MakeShared<OpDesc>(node_name, AIPPDATA); | ||||
GE_CHECK_NOTNULL(op_desc_ptr_data); | GE_CHECK_NOTNULL(op_desc_ptr_data); | ||||
// Add dynamic aipp config to aipp_data | |||||
GeAttrValue::NAMED_ATTRS aipp_attr; | |||||
ConvertParamToAttr(aipp_attr); | |||||
(void)AttrUtils::SetNamedAttrs(op_desc_ptr_data, ATTR_NAME_AIPP, aipp_attr); | |||||
if (AddAttrToAippData(op_desc_ptr_data) != SUCCESS) { | |||||
return INTERNAL_ERROR; | |||||
} | |||||
auto stat1 = op_desc_ptr_data->AddInputDesc(input_tensor); | auto stat1 = op_desc_ptr_data->AddInputDesc(input_tensor); | ||||
@@ -78,9 +78,11 @@ class AippOp : public InsertOpBase { | |||||
Status CreateAippData(const NodePtr &aipp); | Status CreateAippData(const NodePtr &aipp); | ||||
Status AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size); | Status AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size); | ||||
Status AddAippAttrbutes(const OpDescPtr &op_desc, const std::string &aipp_cfg_path, const uint32_t &index); | Status AddAippAttrbutes(const OpDescPtr &op_desc, const std::string &aipp_cfg_path, const uint32_t &index); | ||||
Status AddAttrToAippData(const OpDescPtr &aipp_data_op_desc); | |||||
domi::AippOpParams *aipp_params_ = nullptr; | domi::AippOpParams *aipp_params_ = nullptr; | ||||
ge::NodePtr aipp_node_ = nullptr; | ge::NodePtr aipp_node_ = nullptr; | ||||
ge::NodePtr data_node_linked_aipp = nullptr; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
@@ -22,6 +22,7 @@ | |||||
#include "common/ge/ge_util.h" | #include "common/ge/ge_util.h" | ||||
#include "common/op/ge_op_utils.h" | #include "common/op/ge_op_utils.h" | ||||
#include "common/util.h" | #include "common/util.h" | ||||
#include "common/util/error_manager/error_manager.h" | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
#include "framework/common/ge_inner_error_codes.h" | #include "framework/common/ge_inner_error_codes.h" | ||||
@@ -120,15 +121,15 @@ Status InsertNewOpUtil::CheckPositionNotRepeat() { | |||||
for (int j = i + 1; j < insert_op_conf_->aipp_op_size(); j++) { | for (int j = i + 1; j < insert_op_conf_->aipp_op_size(); j++) { | ||||
const domi::AippOpParams *another_item = insert_op_conf_->mutable_aipp_op(j); | const domi::AippOpParams *another_item = insert_op_conf_->mutable_aipp_op(j); | ||||
GE_IF_BOOL_EXEC(item->related_input_rank() != another_item->related_input_rank(), continue;); | |||||
GE_IF_BOOL_EXEC( | |||||
item->input_edge_idx_size() == 0 || another_item->input_edge_idx_size() == 0 || | |||||
item->input_edge_idx(0) == another_item->input_edge_idx(0), | |||||
GELOGE(PARAM_INVALID, | |||||
"Can not insert aipp op to the same postion! please check related_input_rank and input_edge_idx."); | |||||
return PARAM_INVALID;); | |||||
GE_IF_BOOL_EXEC(item->related_input_rank() == another_item->related_input_rank(), | |||||
string errormsg = | |||||
"Can not insert aipp to the same postion! Please ensure related_input_rank" | |||||
" param is different in different aipp config."; | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); | |||||
GELOGE(PARAM_INVALID, | |||||
"Can not insert aipp op to the same postion! Please ensure related_input_rank param " | |||||
"is different in different aipp config."); | |||||
return PARAM_INVALID;); | |||||
} | } | ||||
} | } | ||||
@@ -162,18 +163,12 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { | |||||
std::unique_ptr<domi::AippOpParams> aippParams(new (std::nothrow) domi::AippOpParams()); | std::unique_ptr<domi::AippOpParams> aippParams(new (std::nothrow) domi::AippOpParams()); | ||||
GE_CHECK_NOTNULL(aippParams); | GE_CHECK_NOTNULL(aippParams); | ||||
GE_IF_BOOL_EXEC(aippNodes.size() > 0, GE_CHK_STATUS(GetAippParams(aippParams, aippNodes[0])); | |||||
aippMode = (aippMode == domi::AippOpParams::undefined) ? aippParams->aipp_mode() : aippMode; | |||||
GE_CHK_BOOL_RET_STATUS(aippMode == aippParams->aipp_mode(), PARAM_INVALID, | |||||
"The aipp_mode of all aipp_op must be the same");); | |||||
GE_IF_BOOL_EXEC( | GE_IF_BOOL_EXEC( | ||||
aippNodes.size() > 1, for (decltype(aippNodes)::size_type i = 1; i < aippNodes.size(); i++) { | aippNodes.size() > 1, for (decltype(aippNodes)::size_type i = 1; i < aippNodes.size(); i++) { | ||||
std::unique_ptr<domi::AippOpParams> currAippParam(new (std::nothrow) domi::AippOpParams()); | std::unique_ptr<domi::AippOpParams> currAippParam(new (std::nothrow) domi::AippOpParams()); | ||||
GE_CHECK_NOTNULL(currAippParam); | GE_CHECK_NOTNULL(currAippParam); | ||||
GE_CHK_STATUS(GetAippParams(currAippParam, aippNodes[i])); | GE_CHK_STATUS(GetAippParams(currAippParam, aippNodes[i])); | ||||
GE_CHK_BOOL_RET_STATUS(aippMode == currAippParam->aipp_mode(), PARAM_INVALID, | |||||
"The aipp_mode of all aipp_op must be the same"); | |||||
if (aippMode == domi::AippOpParams::static_) { | if (aippMode == domi::AippOpParams::static_) { | ||||
GE_CHK_BOOL_RET_STATUS(aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID, | GE_CHK_BOOL_RET_STATUS(aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID, | ||||
"The input_format of all aipp_ops after one Data should be the same"); | "The input_format of all aipp_ops after one Data should be the same"); | ||||
@@ -113,10 +113,9 @@ NodePtr InsertCopyNode(const NodePtr &node, size_t n) { | |||||
desc->CopyAttrsFrom(*src_op_desc); | desc->CopyAttrsFrom(*src_op_desc); | ||||
for (uint32_t i = 0; i < node->GetAllInDataAnchorsSize(); ++i) { | for (uint32_t i = 0; i < node->GetAllInDataAnchorsSize(); ++i) { | ||||
auto input_desc = desc->MutableInputDesc(i); | auto input_desc = desc->MutableInputDesc(i); | ||||
GE_IF_BOOL_EXEC(input_desc == nullptr, | |||||
GELOGE(INTERNAL_ERROR, "Failed to get input desc by index %u from node %s when copy from %s", i, | |||||
desc->GetName().c_str(), node->GetName().c_str()); | |||||
return nullptr); | |||||
GE_IF_BOOL_EXEC(input_desc == nullptr, GELOGW("Get null input desc by index %u from node %s when copy from %s", i, | |||||
desc->GetName().c_str(), node->GetName().c_str()); | |||||
continue); | |||||
input_desc->CopyAttrsFrom(src_op_desc->GetInputDesc(i)); | input_desc->CopyAttrsFrom(src_op_desc->GetInputDesc(i)); | ||||
} | } | ||||
@@ -991,12 +990,17 @@ Status MultiBatchGraphCopyer::InsertIdentityAfterSwitchN() { | |||||
size_t i = 0; | size_t i = 0; | ||||
for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { | for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { | ||||
for (auto &in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { | for (auto &in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { | ||||
auto identity_desc = MakeShared<OpDesc>(node->GetName() + "_identity_" + std::to_string(i), IDENTITY); | |||||
GE_CHECK_NOTNULL(identity_desc); | |||||
auto out_node = in_data_anchor->GetOwnerNode(); | auto out_node = in_data_anchor->GetOwnerNode(); | ||||
auto op_desc = out_node->GetOpDesc(); | auto op_desc = out_node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
if ((out_node->GetType() == MERGE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) { | |||||
GELOGD("No need to insert identity between %s and %s.", node->GetName().c_str(), out_node->GetName().c_str()); | |||||
continue; | |||||
} | |||||
auto identity_desc = MakeShared<OpDesc>(node->GetName() + "_identity_" + std::to_string(i), IDENTITY); | |||||
GE_CHECK_NOTNULL(identity_desc); | |||||
string batch_label; | string batch_label; | ||||
if (AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { | if (AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { | ||||
if (!AttrUtils::SetStr(identity_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { | if (!AttrUtils::SetStr(identity_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { | ||||
@@ -1159,7 +1163,7 @@ void GetDynamicShapeByMerge(const ComputeGraphPtr &graph, const NodePtr &node, s | |||||
} | } | ||||
} | } | ||||
// Connect NetOutput directly: DTS2020070612498 | |||||
// Connect NetOutput directly | |||||
void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, const set<size_t> &dynamic_output_index, | void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, const set<size_t> &dynamic_output_index, | ||||
vector<string> &dynamic_output_dims) { | vector<string> &dynamic_output_dims) { | ||||
GELOGD("Try get directly shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str()); | GELOGD("Try get directly shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str()); | ||||
@@ -40,7 +40,7 @@ include ${BUILD_HOST_SHARED_LIBRARY} | |||||
include $(CLEAR_VARS) | include $(CLEAR_VARS) | ||||
LOCAL_MODULE := atclib/libhost_cpu_engine | LOCAL_MODULE := atclib/libhost_cpu_engine | ||||
LOCAL_CFLAGS += -Werror | LOCAL_CFLAGS += -Werror | ||||
LOCAL_CFLAGS += -std=c++11 | |||||
LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE | |||||
LOCAL_LDFLAGS := | LOCAL_LDFLAGS := | ||||
LOCAL_STATIC_LIBRARIES := | LOCAL_STATIC_LIBRARIES := | ||||
@@ -16,131 +16,262 @@ | |||||
#include "host_kernels/strided_slice_kernel.h" | #include "host_kernels/strided_slice_kernel.h" | ||||
#include <memory> | |||||
#include "common/fp16_t.h" | #include "common/fp16_t.h" | ||||
#include "common/ge_inner_error_codes.h" | #include "common/ge_inner_error_codes.h" | ||||
#include "common/math/math_util.h" | #include "common/math/math_util.h" | ||||
#include "common/op/ge_op_utils.h" | #include "common/op/ge_op_utils.h" | ||||
#include "external/graph/types.h" | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "host_kernels/kernel_utils.h" | |||||
#include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
#include "host_kernels/kernel_utils.h" | |||||
#include "inc/kernel_factory.h" | #include "inc/kernel_factory.h" | ||||
#include <memory> | |||||
namespace ge { | namespace ge { | ||||
namespace { | namespace { | ||||
const int32_t kNumOne = 1; | const int32_t kNumOne = 1; | ||||
const size_t kStridedSliceInputSize = 4; | const size_t kStridedSliceInputSize = 4; | ||||
const size_t kStridedSliceInputIndex0 = 0; | |||||
const size_t kStridedSliceInputIndex1 = 1; | |||||
const size_t kStridedSliceInputIndex2 = 2; | |||||
const size_t kStridedSliceInputIndex3 = 3; | |||||
const int32_t kDefaultSrideSize = 1; | |||||
} // namespace | |||||
Status StridedSliceKernel::CheckAndGetAttr(const OpDescPtr &attr, const std::vector<ConstGeTensorPtr> &input, | |||||
Attr &args) { | |||||
int64_t begin_mask = 0; | |||||
int64_t end_mask = 0; | |||||
int64_t ellipsis_mask = 0; | |||||
int64_t new_axis_mask = 0; | |||||
int64_t shrink_axis_mask = 0; | |||||
const size_t kStridedSliceInputIndex = 0; | |||||
const size_t kStridedSliceBeginIndex = 1; | |||||
const size_t kStridedSliceEndIndex = 2; | |||||
const size_t kStridedSliceStrideIndex = 3; | |||||
const int32_t kDefaultStrideSize = 1; | |||||
const std::set<DataType> kIndexNumberType = {DT_INT32, DT_INT64}; | |||||
if (attr == nullptr) { | |||||
GELOGW("input opdescptr is nullptr."); | |||||
return PARAM_INVALID; | |||||
bool IsEllipsisMaskValid(const GeTensorDescPtr &input_desc, const int ellipsis_mask) { | |||||
if (ellipsis_mask != 0) { | |||||
auto ellipsis_num = 0; | |||||
auto input_shape = input_desc->GetShape(); | |||||
bool ellipsis_mask_flag = false; | |||||
for (size_t i = 0; i < input_shape.GetDimNum(); i++) { | |||||
uint32_t i_temp = static_cast<uint32_t>(i); | |||||
ellipsis_mask_flag = (static_cast<uint32_t>(ellipsis_mask) & (1 << i_temp)); | |||||
if (ellipsis_mask_flag) { | |||||
++ellipsis_num; | |||||
} | |||||
if (ellipsis_num > 1) { | |||||
GELOGW("Only one non-zero bit is allowed in ellipsis_mask."); | |||||
return false; | |||||
} | |||||
} | |||||
} | } | ||||
if (input.size() != kStridedSliceInputSize) { | |||||
GELOGW("The number of input for strided slice must be %zu.", kStridedSliceInputSize); | |||||
return PARAM_INVALID; | |||||
return true; | |||||
} | |||||
} // namespace | |||||
Status StridedSliceKernel::Compute(const ge::OpDescPtr attr, const std::vector<ge::ConstGeTensorPtr> &input, | |||||
vector<ge::GeTensorPtr> &v_output) { | |||||
GELOGD("StridedSliceKernel in."); | |||||
// 1.Check input and attrs | |||||
if (CheckAndGetAttr(attr) != SUCCESS) { | |||||
GELOGW("Check and get attrs failed.Ignore kernel."); | |||||
return NOT_CHANGED; | |||||
} | } | ||||
if (!AttrUtils::GetInt(attr, STRIDE_SLICE_ATTR_BEGIN_MASK, begin_mask)) { | |||||
GELOGW("get begin_mask attr failed."); | |||||
return PARAM_INVALID; | |||||
if (CheckInputParam(input) != SUCCESS) { | |||||
GELOGW("Check input params failed.Ignore kernel."); | |||||
return NOT_CHANGED; | |||||
} | } | ||||
if (!AttrUtils::GetInt(attr, STRIDE_SLICE_ATTR_END_MASK, end_mask)) { | |||||
GELOGW("get end_mask attr failed."); | |||||
return PARAM_INVALID; | |||||
// 2.Init param with mask attrs. | |||||
std::vector<int64_t> input_dims; | |||||
std::vector<int64_t> begin_vec; | |||||
std::vector<int64_t> output_dims; | |||||
std::vector<int64_t> stride_vec; | |||||
if (InitParamWithAttrs(input, input_dims, begin_vec, output_dims, stride_vec) != SUCCESS) { | |||||
GELOGW("Init param with mask attrs failed.Ignore kernel."); | |||||
return NOT_CHANGED; | |||||
} | } | ||||
if (!AttrUtils::GetInt(attr, STRIDE_SLICE_ATTR_ELLIPSIS_MASK, ellipsis_mask)) { | |||||
GELOGW("get ellipsis_mask attr failed."); | |||||
return PARAM_INVALID; | |||||
// 3.Set sliced data to output_ptr | |||||
ConstGeTensorPtr weight0 = input[kStridedSliceInputIndex]; | |||||
auto data_type = weight0->GetTensorDesc().GetDataType(); | |||||
size_t data_size = weight0->GetData().size() / GetSizeByDataType(data_type); | |||||
void *data = reinterpret_cast<void *>(const_cast<uint8_t *>(weight0->GetData().data())); | |||||
GE_CHECK_NOTNULL(data); | |||||
// Index 0 can always gets a GeTensorDesc object from any OpDescPtr. | |||||
auto output_tensor_desc = attr->GetOutputDesc(0); | |||||
GeTensorPtr output_ptr = MakeShared<GeTensor>(output_tensor_desc); | |||||
if (output_ptr == nullptr) { | |||||
GELOGE(MEMALLOC_FAILED, "MakeShared GeTensor failed, node name %s.", attr->GetName().c_str()); | |||||
return NOT_CHANGED; | |||||
} | } | ||||
if (!AttrUtils::GetInt(attr, STRIDE_SLICE_ATTR_NEW_AXIS_MASK, new_axis_mask)) { | |||||
GELOGW("get new_axis_mask attr failed."); | |||||
return PARAM_INVALID; | |||||
auto ret = OpUtils::SetOutputSliceData(data, static_cast<int64_t>(data_size), data_type, input_dims, begin_vec, | |||||
output_dims, output_ptr.get(), stride_vec); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(INTERNAL_ERROR, "SetOutputSliceData failed."); | |||||
return NOT_CHANGED; | |||||
} | } | ||||
if (!AttrUtils::GetInt(attr, STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK, shrink_axis_mask)) { | |||||
GELOGW("get shrink_axis_mask attr failed."); | |||||
// 4.Set output data_type and shape | |||||
GeTensorDesc &t_d = output_ptr->MutableTensorDesc(); | |||||
t_d.SetDataType(static_cast<DataType>(data_type)); | |||||
auto final_dim_size = static_cast<uint32_t>(output_dims.size()); | |||||
vector<int64_t> v_dims; | |||||
GetOutputDims(final_dim_size, output_dims, v_dims); | |||||
t_d.SetShape(GeShape(v_dims)); | |||||
v_output.push_back(output_ptr); | |||||
GELOGI("StridedSliceKernel success."); | |||||
return SUCCESS; | |||||
} | |||||
Status StridedSliceKernel::CheckAndGetAttr(const OpDescPtr &attr) { | |||||
if (attr == nullptr) { | |||||
GELOGE(PARAM_INVALID, "input opdescptr is nullptr."); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
if ((ellipsis_mask != 0) || (new_axis_mask != 0)) { | |||||
GELOGW("ellipsis_mask or new_axis_mask must be 0 with optimizer."); | |||||
return NOT_CHANGED; | |||||
// Get all op attr value of strided_slice | |||||
for (auto &attr_2_value : attr_value_map_) { | |||||
if (!AttrUtils::GetInt(attr, attr_2_value.first, attr_2_value.second)) { | |||||
GELOGE(PARAM_INVALID, "Get %s attr failed.", attr_2_value.first.c_str()); | |||||
return PARAM_INVALID; | |||||
} | |||||
} | } | ||||
const auto &input_desc = attr->MutableInputDesc(kStridedSliceInputIndex0); | |||||
// Check ellipsis_mask is valid | |||||
const auto &input_desc = attr->MutableInputDesc(kStridedSliceInputIndex); | |||||
GE_CHECK_NOTNULL(input_desc); | GE_CHECK_NOTNULL(input_desc); | ||||
DataType data_type = input_desc->GetDataType(); | |||||
if ((data_type != DT_FLOAT) && (data_type != DT_INT32)) { | |||||
GELOGW( | |||||
"Data type of StridedSlice OP must be float or int32." | |||||
"Constant folding will not be carried out in this condition" | |||||
"which might affect the time performance but not the accuracy"); | |||||
} | |||||
args.begin_mask = begin_mask; | |||||
args.end_mask = end_mask; | |||||
args.ellipsis_mask = ellipsis_mask; | |||||
args.new_axis_mask = new_axis_mask; | |||||
args.data_type = static_cast<int64_t>(data_type); | |||||
args.shrink_axis_mask = shrink_axis_mask; | |||||
ConstGeTensorPtr weight0 = input[kStridedSliceInputIndex0]; | |||||
ConstGeTensorPtr weight1 = input[kStridedSliceInputIndex1]; | |||||
ConstGeTensorPtr weight2 = input[kStridedSliceInputIndex2]; | |||||
ConstGeTensorPtr weight3 = input[kStridedSliceInputIndex3]; | |||||
if (CheckWeight(weight0, weight1, weight2, weight3) != SUCCESS) { | |||||
GELOGW("Check And Get Attr failed."); | |||||
auto ellipsis_mask = attr_value_map_.at(STRIDE_SLICE_ATTR_ELLIPSIS_MASK); | |||||
if (!IsEllipsisMaskValid(input_desc, ellipsis_mask)) { | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status StridedSliceKernel::CheckWeight(const ConstGeTensorPtr &weight0, const ConstGeTensorPtr &weight1, | |||||
const ConstGeTensorPtr &weight2, const ConstGeTensorPtr &weight3) const { | |||||
if ((weight0 == nullptr) || (weight1 == nullptr) || (weight2 == nullptr) || (weight3 == nullptr)) { | |||||
GELOGW("weight is nullptr."); | |||||
Status StridedSliceKernel::CheckInputParam(const std::vector<ConstGeTensorPtr> &input) const { | |||||
if (input.size() != kStridedSliceInputSize) { | |||||
GELOGE(PARAM_INVALID, "The number of input for strided slice must be %zu.", kStridedSliceInputSize); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
if (!(weight1->GetTensorDesc().GetDataType() == DT_INT32 && weight2->GetTensorDesc().GetDataType() == DT_INT32 && | |||||
weight3->GetTensorDesc().GetDataType() == DT_INT32)) { | |||||
GELOGE(INTERNAL_ERROR, "Data type of StridedSlice OP(begin,end,strides) must be int32."); | |||||
return INTERNAL_ERROR; | |||||
ConstGeTensorPtr weight0 = input[kStridedSliceInputIndex]; | |||||
ConstGeTensorPtr begin_tensor = input[kStridedSliceBeginIndex]; | |||||
ConstGeTensorPtr end_tensor = input[kStridedSliceEndIndex]; | |||||
ConstGeTensorPtr stride_tensor = input[kStridedSliceStrideIndex]; | |||||
GE_CHECK_NOTNULL(weight0); | |||||
GE_CHECK_NOTNULL(begin_tensor); | |||||
GE_CHECK_NOTNULL(end_tensor); | |||||
GE_CHECK_NOTNULL(stride_tensor); | |||||
// check if begin,end,strides data type is supported | |||||
auto begin_tensor_desc = begin_tensor->GetTensorDesc(); | |||||
auto end_tensor_desc = begin_tensor->GetTensorDesc(); | |||||
auto stride_tensor_desc = begin_tensor->GetTensorDesc(); | |||||
if (begin_tensor_desc.GetDataType() != end_tensor_desc.GetDataType() || | |||||
end_tensor_desc.GetDataType() != stride_tensor_desc.GetDataType()) { | |||||
GELOGW("Data type of StridedSlice OP(begin,end,strides) must be same."); | |||||
return PARAM_INVALID; | |||||
} | |||||
if (kIndexNumberType.find(begin_tensor_desc.GetDataType()) == kIndexNumberType.end()) { | |||||
GELOGW("Data type of StridedSlice OP(begin,end,strides) must be int32 or int64."); | |||||
return PARAM_INVALID; | |||||
} | } | ||||
// check data | // check data | ||||
size_t weight0_size = weight0->GetData().size() / sizeof(int32_t); | |||||
size_t weight1_size = weight1->GetData().size() / sizeof(int32_t); | |||||
size_t weight2_size = weight2->GetData().size() / sizeof(int32_t); | |||||
size_t weight3_size = weight3->GetData().size() / sizeof(int32_t); | |||||
if ((weight0_size == 0) || (weight1_size == 0) || (weight2_size == 0) || (weight3_size == 0)) { | |||||
auto x_data_type = weight0->GetTensorDesc().GetDataType(); | |||||
auto x_data_size = GetSizeByDataType(x_data_type); | |||||
if (x_data_size < 0) { | |||||
GELOGW("Data type of x input %s is not supported.", TypeUtils::DataTypeToSerialString(x_data_type).c_str()); | |||||
return PARAM_INVALID; | |||||
} | |||||
size_t weight0_size = weight0->GetData().size() / x_data_size; | |||||
size_t begin_data_size = begin_tensor->GetData().size() / sizeof(int32_t); | |||||
size_t end_data_size = end_tensor->GetData().size() / sizeof(int32_t); | |||||
size_t stride_data_size = stride_tensor->GetData().size() / sizeof(int32_t); | |||||
if ((weight0_size == 0) || (begin_data_size == 0) || (end_data_size == 0) || (stride_data_size == 0)) { | |||||
GELOGW("Data size of inputs is 0."); | GELOGW("Data size of inputs is 0."); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
// check dim size | // check dim size | ||||
size_t weight0_dim_size = weight0->GetTensorDesc().GetShape().GetDimNum(); | |||||
if (!((weight0_dim_size >= weight1_size) && (weight1_size == weight2_size) && (weight1_size == weight3_size))) { | |||||
if (!((begin_data_size == end_data_size) && (end_data_size == stride_data_size))) { | |||||
GELOGW("The sizes of begin, end and stride is not supported."); | GELOGW("The sizes of begin, end and stride is not supported."); | ||||
return NOT_CHANGED; | |||||
return PARAM_INVALID; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status StridedSliceKernel::MaskCal(const bool &begin_mask_flag, const bool &end_mask_flag, const bool &shrink_mask_flag, | |||||
int32_t &begin_i, int32_t &end_i, int32_t &dim_i) const { | |||||
Status StridedSliceKernel::InitParamWithAttrs(const std::vector<ConstGeTensorPtr> &input, | |||||
std::vector<int64_t> &input_dims, std::vector<int64_t> &begin_vec, | |||||
std::vector<int64_t> &output_dims, std::vector<int64_t> &stride_vec) { | |||||
ConstGeTensorPtr weight0 = input[kStridedSliceInputIndex]; | |||||
ConstGeTensorPtr begin_tensor = input[kStridedSliceBeginIndex]; | |||||
ConstGeTensorPtr end_tensor = input[kStridedSliceEndIndex]; | |||||
ConstGeTensorPtr stride_tensor = input[kStridedSliceStrideIndex]; | |||||
const GeShape x_shape = weight0->GetTensorDesc().GetShape(); | |||||
auto x_dims = x_shape.GetDims(); | |||||
auto x_dims_num = x_shape.GetDimNum(); | |||||
// handle new_axis_mask | |||||
ExpandDimsWithNewAxis(begin_tensor, x_dims_num, x_dims); | |||||
const int32_t *begin = reinterpret_cast<const int32_t *>(begin_tensor->GetData().data()); | |||||
const int32_t *end = reinterpret_cast<const int32_t *>(end_tensor->GetData().data()); | |||||
const int32_t *stride = reinterpret_cast<const int32_t *>(stride_tensor->GetData().data()); | |||||
auto begin_dim_num = begin_tensor->GetData().size() / sizeof(int32_t); | |||||
auto min_dim = x_dims_num > begin_dim_num ? begin_dim_num : x_dims_num; | |||||
for (size_t i = 0; i < x_dims.size(); ++i) { | |||||
auto i_temp = static_cast<uint64_t>(i); | |||||
bool new_axis_mask_flag = | |||||
(static_cast<uint64_t>(attr_value_map_.at(STRIDE_SLICE_ATTR_NEW_AXIS_MASK)) & (1 << i_temp)); | |||||
if (new_axis_mask_flag) { | |||||
output_dims.push_back(1); | |||||
input_dims.push_back(1); | |||||
begin_vec.push_back(0); | |||||
stride_vec.push_back(1); | |||||
continue; | |||||
} | |||||
int64_t begin_i = 0; | |||||
int64_t end_i = 0; | |||||
int64_t stride_i = 1; | |||||
if (i < min_dim) { | |||||
begin_i = begin[i]; | |||||
end_i = end[i]; | |||||
stride_i = stride[i]; | |||||
} else { | |||||
begin_i = 0; | |||||
end_i = x_dims.at(i); | |||||
stride_i = 1; | |||||
} | |||||
GELOGD("Before mask calculate. Begin is : %d\t,end is : %d\t stride is : %d\t x_dim_i is : %d.", begin_i, end_i, | |||||
stride_i, x_dims.at(i)); | |||||
auto ret = MaskCal(i, begin_i, end_i, x_dims.at(i)); | |||||
if (ret != SUCCESS) { | |||||
GELOGW("MaskCal failed, because of data overflow."); | |||||
return NOT_CHANGED; | |||||
} | |||||
int64_t dim_final; | |||||
GELOGD("Before stride calculate. Begin is : %d\t,end is : %d\t stride is : %d\t x_dim_i is : %d.", begin_i, end_i, | |||||
stride_i, x_dims.at(i)); | |||||
(void)StrideCal(x_dims.at(i), begin_i, end_i, stride_i, dim_final); | |||||
output_dims.push_back(dim_final); | |||||
input_dims.push_back(x_dims.at(i)); | |||||
begin_vec.push_back(begin_i); | |||||
stride_vec.push_back(stride_i); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
void StridedSliceKernel::ExpandDimsWithNewAxis(const ConstGeTensorPtr &begin_tensor, const size_t x_dims_num, | |||||
vector<int64_t> &x_dims) { | |||||
auto begin_data_type_size = GetSizeByDataType(begin_tensor->GetTensorDesc().GetDataType()); | |||||
size_t begin_vec_size = begin_tensor->GetData().size() / begin_data_type_size; | |||||
auto final_dim_num = x_dims_num < begin_vec_size ? begin_vec_size : x_dims_num; | |||||
for (size_t i = 0; i < final_dim_num; i++) { | |||||
auto i_temp = static_cast<uint64_t>(i); | |||||
bool new_axis_mask_flag = | |||||
(static_cast<uint64_t>(attr_value_map_.at(STRIDE_SLICE_ATTR_NEW_AXIS_MASK)) & (1 << i_temp)); | |||||
if (new_axis_mask_flag) { | |||||
x_dims.insert(x_dims.begin() + i, 1); | |||||
} | |||||
} | |||||
} | |||||
Status StridedSliceKernel::MaskCal(const size_t i, int64_t &begin_i, int64_t &end_i, int64_t &dim_i) const { | |||||
uint64_t i_temp = static_cast<uint64_t>(i); | |||||
bool begin_mask_flag = (static_cast<uint64_t>(attr_value_map_.at(STRIDE_SLICE_ATTR_BEGIN_MASK)) & (1 << i_temp)); | |||||
bool end_mask_flag = (static_cast<uint64_t>(attr_value_map_.at(STRIDE_SLICE_ATTR_END_MASK)) & (1 << i_temp)); | |||||
bool ellipsis_mask_flag = | |||||
(static_cast<uint64_t>(attr_value_map_.at(STRIDE_SLICE_ATTR_ELLIPSIS_MASK)) & (1 << i_temp)); | |||||
bool shrink_mask_flag = | |||||
(static_cast<uint32_t>(attr_value_map_.at(STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK)) & (1 << i_temp)); | |||||
if (shrink_mask_flag) { | if (shrink_mask_flag) { | ||||
begin_i = (begin_i < 0 ? (dim_i + begin_i) : begin_i); | begin_i = (begin_i < 0 ? (dim_i + begin_i) : begin_i); | ||||
FMK_INT32_ADDCHECK(begin_i, kNumOne); | |||||
FMK_INT32_ADDCHECK(begin_i, kNumOne) | |||||
end_i = begin_i + kNumOne; | end_i = begin_i + kNumOne; | ||||
} else { | } else { | ||||
if (begin_mask_flag) { | if (begin_mask_flag) { | ||||
@@ -153,130 +284,43 @@ Status StridedSliceKernel::MaskCal(const bool &begin_mask_flag, const bool &end_ | |||||
} else { | } else { | ||||
end_i = (end_i < 0 ? (dim_i + end_i) : end_i); | end_i = (end_i < 0 ? (dim_i + end_i) : end_i); | ||||
} | } | ||||
if (ellipsis_mask_flag) { | |||||
begin_i = 0; | |||||
end_i = dim_i; | |||||
} | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status StridedSliceKernel::StrideCal(const int64_t x_dims_i, int64_t &begin_i, int64_t &end_i, int64_t &stride_i, | |||||
int64_t &dim_final) const { | |||||
if (stride_i == 0) { | |||||
stride_i = kDefaultStrideSize; | |||||
} else if (stride_i < 0) { | |||||
stride_i = -stride_i; | |||||
begin_i = x_dims_i - begin_i - 1; | |||||
end_i = x_dims_i - end_i - 1; | |||||
} | |||||
void StridedSliceKernel::GetOutputDims(uint32_t dims_size, const std::vector<int64_t> &output_dims, const Attr &args, | |||||
if (end_i > x_dims_i) { | |||||
end_i = x_dims_i; | |||||
} | |||||
if ((begin_i == 0) && (end_i == 0)) { | |||||
dim_final = x_dims_i; | |||||
} else { | |||||
dim_final = abs(end_i - begin_i) / stride_i; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
void StridedSliceKernel::GetOutputDims(uint32_t dims_size, const std::vector<int64_t> &output_dims, | |||||
vector<int64_t> &v_dims) { | vector<int64_t> &v_dims) { | ||||
for (uint32_t k = 0; k < dims_size; k++) { | for (uint32_t k = 0; k < dims_size; k++) { | ||||
bool shrink_mask_i = (static_cast<uint32_t>(args.shrink_axis_mask) & (1 << k)); | |||||
bool shrink_mask_i = (static_cast<uint32_t>(attr_value_map_.at(STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK)) & (1 << k)); | |||||
if (shrink_mask_i) { | if (shrink_mask_i) { | ||||
continue; | continue; | ||||
} | } | ||||
v_dims.push_back(output_dims[k]); | v_dims.push_back(output_dims[k]); | ||||
} | } | ||||
} | } | ||||
Status StridedSliceKernel::CheckOutputDims(const std::vector<int64_t> &output_dims, const OpDescPtr attr) { | |||||
// check dim not all less than 0 | |||||
for (auto dim : output_dims) { | |||||
if (dim > 0) { | |||||
return SUCCESS; | |||||
} | |||||
} | |||||
GELOGW("all output dim <=0, can't be processed. op_name : %s", attr->GetName().c_str()); | |||||
return NOT_CHANGED; | |||||
} | |||||
Status StridedSliceKernel::Compute(const ge::OpDescPtr attr, const std::vector<ge::ConstGeTensorPtr> &input, | |||||
vector<ge::GeTensorPtr> &v_output) { | |||||
GELOGI("StridedSliceKernel in."); | |||||
Attr args; | |||||
Status ret = CheckAndGetAttr(attr, input, args); | |||||
if (ret != SUCCESS) { | |||||
GELOGW("Check And Get Attr failed."); | |||||
return NOT_CHANGED; | |||||
} | |||||
ConstGeTensorPtr weight0 = input[kStridedSliceInputIndex0]; | |||||
ConstGeTensorPtr weight1 = input[kStridedSliceInputIndex1]; | |||||
ConstGeTensorPtr weight2 = input[kStridedSliceInputIndex2]; | |||||
ConstGeTensorPtr weight3 = input[kStridedSliceInputIndex3]; | |||||
const GeShape x_shape = weight0->GetTensorDesc().GetShape(); | |||||
size_t dim_size = x_shape.GetDimNum(); | |||||
size_t data_size = weight0->GetData().size() / sizeof(int32_t); | |||||
const int32_t *begin = reinterpret_cast<const int32_t *>(weight1->GetData().data()); | |||||
const int32_t *end = reinterpret_cast<const int32_t *>(weight2->GetData().data()); | |||||
const int32_t *stride = reinterpret_cast<const int32_t *>(weight3->GetData().data()); | |||||
if ((begin == nullptr) || (end == nullptr) || (stride == nullptr)) { | |||||
GELOGW("input weight tensor is nullptr."); | |||||
return NOT_CHANGED; | |||||
} | |||||
std::vector<int64_t> input_dims; | |||||
std::vector<int64_t> begin_vec; | |||||
std::vector<int64_t> output_dims; | |||||
std::vector<int64_t> stride_vec; | |||||
int64_t dim_final; | |||||
for (size_t i = 0; i < dim_size; i++) { | |||||
int32_t begin_i = begin[i]; | |||||
int32_t end_i = end[i]; | |||||
int32_t stride_i = stride[i]; | |||||
int32_t dim_i = static_cast<int32_t>(x_shape.GetDim(i)); | |||||
GELOGI("%d\t %d\t %d\t %d", begin_i, end_i, stride_i, dim_i); | |||||
uint32_t i_temp = static_cast<uint32_t>(i); | |||||
bool begin_mask_i = (static_cast<uint32_t>(args.begin_mask) & (1 << i_temp)); | |||||
bool end_mask_i = (static_cast<uint32_t>(args.end_mask) & (1 << i_temp)); | |||||
bool shrink_mask_i = (static_cast<uint32_t>(args.shrink_axis_mask) & (1 << i_temp)); | |||||
ret = MaskCal(begin_mask_i, end_mask_i, shrink_mask_i, begin_i, end_i, dim_i); | |||||
if (ret != SUCCESS) { | |||||
GELOGW("MaskCal failed, because of data overflow."); | |||||
return NOT_CHANGED; | |||||
} | |||||
if (stride_i == 0) { | |||||
stride_i = kDefaultSrideSize; | |||||
} else if (stride_i < 0) { | |||||
stride_i = -stride_i; | |||||
begin_i = x_shape.GetDim(i) - begin_i - 1; | |||||
end_i = x_shape.GetDim(i) - end_i - 1; | |||||
} | |||||
if ((begin_i == 0) && (end_i == 0)) { | |||||
dim_final = x_shape.GetDim(i); | |||||
} else { | |||||
dim_final = abs(end_i - begin_i) / stride_i; | |||||
} | |||||
output_dims.push_back(dim_final); | |||||
input_dims.push_back(x_shape.GetDim(i)); | |||||
begin_vec.push_back(begin_i); | |||||
stride_vec.push_back(stride_i); | |||||
} | |||||
// Index 0 can always gets a GeTensorDesc object from any OpDescPtr. | |||||
auto output_tensor_desc = attr->GetOutputDesc(0); | |||||
GeTensorPtr output_ptr = MakeShared<GeTensor>(output_tensor_desc); | |||||
if (output_ptr == nullptr) { | |||||
GELOGW("MakeShared GeTensor failed, node name %s.", attr->GetName().c_str()); | |||||
return NOT_CHANGED; | |||||
} | |||||
void *data = reinterpret_cast<void *>(const_cast<uint8_t *>(weight0->GetData().data())); | |||||
GE_CHECK_NOTNULL(data); | |||||
ret = CheckOutputDims(output_dims, attr); | |||||
if (ret != SUCCESS) { | |||||
return ret; | |||||
} | |||||
ret = OpUtils::SetOutputSliceData(data, static_cast<int64_t>(data_size), args.data_type, input_dims, begin_vec, | |||||
output_dims, output_ptr.get(), stride_vec); | |||||
if (ret != SUCCESS) { | |||||
GELOGW("SetOutputSliceData failed."); | |||||
return NOT_CHANGED; | |||||
} | |||||
GeTensorDesc &t_d = output_ptr->MutableTensorDesc(); | |||||
t_d.SetDataType(static_cast<DataType>(args.data_type)); | |||||
uint32_t final_dim_size = static_cast<uint32_t>(output_dims.size()); | |||||
vector<int64_t> v_dims; | |||||
GetOutputDims(final_dim_size, output_dims, args, v_dims); | |||||
t_d.SetShape(GeShape(v_dims)); | |||||
v_output.push_back(output_ptr); | |||||
GELOGI("StridedSliceKernel success."); | |||||
return SUCCESS; | |||||
} | |||||
REGISTER_KERNEL(STRIDEDSLICE, StridedSliceKernel); | REGISTER_KERNEL(STRIDEDSLICE, StridedSliceKernel); | ||||
} // namespace ge | } // namespace ge |
@@ -17,34 +17,33 @@ | |||||
#ifndef GE_GRAPH_PASSES_FOLDING_KERNEL_STRIDED_SLICE_KERNEL_H_ | #ifndef GE_GRAPH_PASSES_FOLDING_KERNEL_STRIDED_SLICE_KERNEL_H_ | ||||
#define GE_GRAPH_PASSES_FOLDING_KERNEL_STRIDED_SLICE_KERNEL_H_ | #define GE_GRAPH_PASSES_FOLDING_KERNEL_STRIDED_SLICE_KERNEL_H_ | ||||
#include <vector> | |||||
#include "inc/kernel.h" | #include "inc/kernel.h" | ||||
#include <vector> | |||||
namespace ge { | namespace ge { | ||||
struct Attr { | |||||
int64_t begin_mask; | |||||
int64_t end_mask; | |||||
int64_t ellipsis_mask; | |||||
int64_t new_axis_mask; | |||||
int64_t data_type; | |||||
int64_t shrink_axis_mask; | |||||
}; | |||||
class StridedSliceKernel : public Kernel { | class StridedSliceKernel : public Kernel { | ||||
public: | public: | ||||
Status Compute(const OpDescPtr attr, const std::vector<ConstGeTensorPtr> &input, | Status Compute(const OpDescPtr attr, const std::vector<ConstGeTensorPtr> &input, | ||||
vector<GeTensorPtr> &v_output) override; | vector<GeTensorPtr> &v_output) override; | ||||
private: | private: | ||||
Status CheckAndGetAttr(const OpDescPtr &attr, const std::vector<ConstGeTensorPtr> &input, Attr &args); | |||||
Status CheckWeight(const ConstGeTensorPtr &weight0, const ConstGeTensorPtr &weight1, const ConstGeTensorPtr &weight2, | |||||
const ConstGeTensorPtr &weight3) const; | |||||
Status MaskCal(const bool &begin_mask_flag, const bool &end_mask_flag, const bool &shrink_mask_flag, int32_t &begin_i, | |||||
int32_t &end_i, int32_t &dim_i) const; | |||||
void GetOutputDims(uint32_t dims_size, const std::vector<int64_t> &output_dims, const Attr &args, | |||||
vector<int64_t> &v_dims); | |||||
Status CheckOutputDims(const std::vector<int64_t> &output_dims, const OpDescPtr attr); | |||||
Status CheckAndGetAttr(const OpDescPtr &attr); | |||||
Status CheckInputParam(const std::vector<ConstGeTensorPtr> &input) const; | |||||
Status InitParamWithAttrs(const std::vector<ConstGeTensorPtr> &input, std::vector<int64_t> &input_dims, | |||||
std::vector<int64_t> &begin_vec, std::vector<int64_t> &output_dims, | |||||
std::vector<int64_t> &stride_vec); | |||||
Status MaskCal(const size_t i, int64_t &begin_i, int64_t &end_i, int64_t &dim_i) const; | |||||
Status StrideCal(const int64_t x_dims_i, int64_t &begin_i, int64_t &end_i, int64_t &stride_i, | |||||
int64_t &dim_final) const; | |||||
void ExpandDimsWithNewAxis(const ConstGeTensorPtr &begin_tensor, const size_t x_dims_num, vector<int64_t> &x_dims); | |||||
void GetOutputDims(uint32_t dims_size, const std::vector<int64_t> &output_dims, vector<int64_t> &v_dims); | |||||
map<string, uint32_t> attr_value_map_ = {{STRIDE_SLICE_ATTR_BEGIN_MASK, 0}, | |||||
{STRIDE_SLICE_ATTR_END_MASK, 0}, | |||||
{STRIDE_SLICE_ATTR_ELLIPSIS_MASK, 0}, | |||||
{STRIDE_SLICE_ATTR_NEW_AXIS_MASK, 0}, | |||||
{STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK, 0}}; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_GRAPH_PASSES_FOLDING_KERNEL_STRIDED_SLICE_KERNEL_H_ | #endif // GE_GRAPH_PASSES_FOLDING_KERNEL_STRIDED_SLICE_KERNEL_H_ |
@@ -27,6 +27,12 @@ const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL"; | |||||
HybridModelExecutor::HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream) | HybridModelExecutor::HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream) | ||||
: model_(model), device_id_(device_id), stream_(stream) {} | : model_(model), device_id_(device_id), stream_(stream) {} | ||||
HybridModelExecutor::~HybridModelExecutor() { | |||||
if (context_.rt_gen_context != nullptr) { | |||||
(void)rtCtxDestroy(context_.rt_gen_context); | |||||
} | |||||
} | |||||
Status HybridModelExecutor::Init() { | Status HybridModelExecutor::Init() { | ||||
GELOGD("Start to init HybridGraphEngine."); | GELOGD("Start to init HybridGraphEngine."); | ||||
GE_CHK_STATUS_RET_NOLOG(InitExecutionContext()); | GE_CHK_STATUS_RET_NOLOG(InitExecutionContext()); | ||||