From: @shenwei41 Reviewed-by: @xsmq,@liucunwei Signed-off-by: @liucunweitags/v1.2.0
@@ -1,8 +1,8 @@ | |||||
[submodule "parser"] | [submodule "parser"] | ||||
path = parser | path = parser | ||||
url = https://gitee.com/ascend/parser.git | url = https://gitee.com/ascend/parser.git | ||||
branch = master | |||||
branch = r1.3.0 | |||||
[submodule "metadef"] | [submodule "metadef"] | ||||
path = metadef | path = metadef | ||||
url = https://gitee.com/ascend/metadef.git | url = https://gitee.com/ascend/metadef.git | ||||
branch = master | |||||
branch = r1.3.0 |
@@ -937,6 +937,10 @@ add_library(atc_stub_ge_compiler SHARED | |||||
add_dependencies(atc_stub_ge_compiler ge_stub) | add_dependencies(atc_stub_ge_compiler ge_stub) | ||||
target_compile_options(atc_stub_ge_compiler PRIVATE | |||||
-fno-common | |||||
) | |||||
target_link_libraries(atc_stub_ge_compiler PRIVATE | target_link_libraries(atc_stub_ge_compiler PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
) | ) | ||||
@@ -171,17 +171,17 @@ Status GEInitialize(const std::map<AscendString, AscendString> &options) { | |||||
// GE finalize, releasing all resources | // GE finalize, releasing all resources | ||||
Status GEFinalize() { | Status GEFinalize() { | ||||
ErrorManager::GetInstance().SetStage(ErrorMessage::kFinalize, ErrorMessage::kFinalize); | |||||
GELOGT(TRACE_INIT, "GEFinalize start"); | |||||
ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||||
std::lock_guard<std::mutex> lock(g_ge_release_mutex); | |||||
// check init status | // check init status | ||||
if (!g_ge_initialized) { | if (!g_ge_initialized) { | ||||
GELOGW("GEFinalize is called before GEInitialize"); | |||||
GELOGW("[FINAL][FINAL]GEFinalize is called before GEInitialize"); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
std::lock_guard<std::mutex> lock(g_ge_release_mutex); | |||||
ErrorManager::GetInstance().SetStage(ErrorMessage::kFinalize, ErrorMessage::kFinalize); | |||||
ErrorManager::GetInstance().GenWorkStreamIdDefault(); | |||||
GELOGT(TRACE_INIT, "GEFinalize start"); | |||||
// call Finalize | // call Finalize | ||||
Status ret = SUCCESS; | Status ret = SUCCESS; | ||||
Status middle_ret; | Status middle_ret; | ||||
@@ -212,6 +212,7 @@ target_link_libraries(ge_executor PRIVATE | |||||
add_library(ge_executor_shared SHARED ${SRC_LIST} ${PROTO_HDRS}) | add_library(ge_executor_shared SHARED ${SRC_LIST} ${PROTO_HDRS}) | ||||
target_compile_options(ge_executor_shared PRIVATE | target_compile_options(ge_executor_shared PRIVATE | ||||
-fno-common | |||||
-Werror | -Werror | ||||
-O2 | -O2 | ||||
-Wno-deprecated-declarations | -Wno-deprecated-declarations | ||||
@@ -38,6 +38,7 @@ REGISTER_OP_CREATOR(ExpandDims, GeDeletedOp); | |||||
REGISTER_OP_CREATOR(Reshape, GeDeletedOp); | REGISTER_OP_CREATOR(Reshape, GeDeletedOp); | ||||
REGISTER_OP_CREATOR(ReFormat, GeDeletedOp); | REGISTER_OP_CREATOR(ReFormat, GeDeletedOp); | ||||
REGISTER_OP_CREATOR(Squeeze, GeDeletedOp); | REGISTER_OP_CREATOR(Squeeze, GeDeletedOp); | ||||
REGISTER_OP_CREATOR(Unsqueeze, GeDeletedOp); | |||||
REGISTER_OP_CREATOR(Size, GeDeletedOp); | REGISTER_OP_CREATOR(Size, GeDeletedOp); | ||||
REGISTER_OP_CREATOR(Shape, GeDeletedOp); | REGISTER_OP_CREATOR(Shape, GeDeletedOp); | ||||
REGISTER_OP_CREATOR(ShapeN, GeDeletedOp); | REGISTER_OP_CREATOR(ShapeN, GeDeletedOp); | ||||
@@ -16,14 +16,12 @@ | |||||
#include "ge_runtime/task/label_goto_task.h" | #include "ge_runtime/task/label_goto_task.h" | ||||
#include "ge_runtime/task/task_factory.h" | #include "ge_runtime/task/task_factory.h" | ||||
#include "framework/common/util.h" | |||||
namespace ge { | namespace ge { | ||||
namespace model_runner { | namespace model_runner { | ||||
LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::shared_ptr<LabelGotoTaskInfo> &task_info) | LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::shared_ptr<LabelGotoTaskInfo> &task_info) | ||||
: TaskRepeater<LabelGotoTaskInfo>(model_context, task_info), | |||||
task_info_(task_info), | |||||
stream_(nullptr), | |||||
label_(nullptr) { | |||||
: TaskRepeater<LabelGotoTaskInfo>(model_context, task_info), task_info_(task_info) { | |||||
if (task_info_ == nullptr) { | if (task_info_ == nullptr) { | ||||
GELOGW("task_info_ is null!"); | GELOGW("task_info_ is null!"); | ||||
return; | return; | ||||
@@ -42,29 +40,78 @@ LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::share | |||||
label_ = label_list[label_id]; | label_ = label_list[label_id]; | ||||
} | } | ||||
LabelGotoTask::~LabelGotoTask() {} | |||||
LabelGotoTask::~LabelGotoTask() { | |||||
GE_FREE_RT_LOG(label_info_); | |||||
GE_FREE_RT_LOG(index_value_); | |||||
} | |||||
bool LabelGotoTask::Distribute() { | bool LabelGotoTask::Distribute() { | ||||
GELOGI("LabelGotoTask Distribute start."); | GELOGI("LabelGotoTask Distribute start."); | ||||
if (!CheckParamValid()) { | |||||
return false; | |||||
} | |||||
const std::vector<void *> label_list = { label_ }; | |||||
rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), RT_MEMORY_HBM); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | |||||
return false; | |||||
} | |||||
uint64_t branch_index = 0; | |||||
rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &branch_index, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | |||||
return false; | |||||
} | |||||
uint32_t label_info_size = sizeof(rtLabelDevInfo) * label_list.size(); | |||||
rt_ret = rtMalloc(&label_info_, label_info_size, RT_MEMORY_HBM); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | |||||
return false; | |||||
} | |||||
rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info_, label_info_size); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | |||||
return false; | |||||
} | |||||
rt_ret = rtLabelSwitchByIndex(index_value_, label_list.size(), label_info_, stream_); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | |||||
return false; | |||||
} | |||||
GELOGI("DistributeTask end."); | |||||
return true; | |||||
} | |||||
bool LabelGotoTask::CheckParamValid() { | |||||
if (stream_ == nullptr) { | if (stream_ == nullptr) { | ||||
GELOGE(PARAM_INVALID, "stream is null!"); | GELOGE(PARAM_INVALID, "stream is null!"); | ||||
return false; | return false; | ||||
} | } | ||||
if (label_ == nullptr) { | if (label_ == nullptr) { | ||||
GELOGE(PARAM_INVALID, "label is null!"); | GELOGE(PARAM_INVALID, "label is null!"); | ||||
return false; | return false; | ||||
} | } | ||||
rtError_t rt_ret = rtLabelGotoEx(label_, stream_); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||||
if (label_info_ != nullptr) { | |||||
GELOGE(PARAM_INVALID, "label_info_ has dirty data."); | |||||
return false; | |||||
} | |||||
if (index_value_ != nullptr) { | |||||
GELOGE(PARAM_INVALID, "index_value_ has dirty data."); | |||||
return false; | return false; | ||||
} | } | ||||
GELOGI("DistributeTask end."); | |||||
return true; | return true; | ||||
} | } | ||||
REGISTER_TASK(TaskInfoType::LABEL_GOTO, LabelGotoTask, LabelGotoTaskInfo); | REGISTER_TASK(TaskInfoType::LABEL_GOTO, LabelGotoTask, LabelGotoTaskInfo); | ||||
} // namespace model_runner | } // namespace model_runner | ||||
} // namespace ge | } // namespace ge |
@@ -31,9 +31,13 @@ class LabelGotoTask : public TaskRepeater<LabelGotoTaskInfo> { | |||||
bool Distribute() override; | bool Distribute() override; | ||||
private: | private: | ||||
bool CheckParamValid(); | |||||
std::shared_ptr<LabelGotoTaskInfo> task_info_; | std::shared_ptr<LabelGotoTaskInfo> task_info_; | ||||
void *stream_; | |||||
void *label_; | |||||
void *stream_{nullptr}; | |||||
void *label_{nullptr}; | |||||
void *label_info_{nullptr}; | |||||
void *index_value_{nullptr}; | |||||
}; | }; | ||||
} // namespace model_runner | } // namespace model_runner | ||||
} // namespace ge | } // namespace ge | ||||
@@ -50,9 +50,13 @@ const char *const kFileNameSuffix = "online"; | |||||
const char *const kAicpuAllshape = "_AllShape"; | const char *const kAicpuAllshape = "_AllShape"; | ||||
constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | ||||
const int64_t kDynamicDimValue = -2; | const int64_t kDynamicDimValue = -2; | ||||
const int kDefaultDeviceId = 0; | |||||
const int kDefaultJobId = 0; | |||||
std::map<ge::OpEngineType, std::string> engine_type_map{ | std::map<ge::OpEngineType, std::string> engine_type_map{ | ||||
{ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}}; | |||||
{ge::ENGINE_SYS, kEngineNameDefault}, | |||||
{ge::ENGINE_AICORE, kAIcoreEngine}, | |||||
{ge::ENGINE_VECTOR, kVectorEngine}}; | |||||
bool ContainsDynamicInpus(const ge::OpDesc &op_desc) { | bool ContainsDynamicInpus(const ge::OpDesc &op_desc) { | ||||
for (auto &tensor_desc : op_desc.GetAllInputsDescPtr()) { | for (auto &tensor_desc : op_desc.GetAllInputsDescPtr()) { | ||||
@@ -83,8 +87,9 @@ static Status CheckEngineTypeSupport(const NodePtr &node, OpEngineType engine_ty | |||||
} else { | } else { | ||||
ErrorManager::GetInstance().ATCReportErrMessage("E14001", {"opname", "optype", "value", "reason"}, | ErrorManager::GetInstance().ATCReportErrMessage("E14001", {"opname", "optype", "value", "reason"}, | ||||
{op_desc->GetName(), op_desc->GetType(), "engine type", | {op_desc->GetName(), op_desc->GetType(), "engine type", | ||||
"it only support kEngineNameDefault/kAIcoreEngine/kVectorEngine"}); | |||||
GELOGE(FAILED, "CheckEngineType: engine type: %d not support", static_cast<int>(engine_type)); | |||||
"it only support default/AIcoreEngine/VectorEngine"}); | |||||
GELOGE(FAILED, "[Check][EngineType]value:%d not support, " | |||||
"only support default/AIcoreEngine/VectorEngine now", static_cast<int>(engine_type)); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -188,17 +193,20 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const | |||||
(void)AttrUtils::SetBool(data_op, "_is_single_op", true); | (void)AttrUtils::SetBool(data_op, "_is_single_op", true); | ||||
GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail."); | |||||
GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail."); | |||||
GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, | |||||
"[Add][InputDesc]fail for node:%s", data_op->GetName().c_str()); | |||||
GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, | |||||
"[Add][OutputDesc]fail for node:%s", data_op->GetName().c_str()); | |||||
if (attr) { | if (attr) { | ||||
GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, index), return FAILED, "Set index fail."); | |||||
GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, index), return FAILED, | |||||
"[Set][Attr:%s]fail for node:%s", ATTR_NAME_INDEX.c_str(), data_op->GetName().c_str()); | |||||
} | } | ||||
ge::NodePtr arg_node = graph->AddNode(data_op); | ge::NodePtr arg_node = graph->AddNode(data_op); | ||||
GE_CHK_BOOL_EXEC(arg_node != nullptr, return FAILED, "Insert Data node fail."); | |||||
GE_CHK_BOOL_EXEC(arg_node != nullptr, return FAILED, "Insert Data node fail"); | |||||
GE_CHK_STATUS(GraphUtils::AddEdge(arg_node->GetOutDataAnchor(0), node->GetInDataAnchor(index)), | GE_CHK_STATUS(GraphUtils::AddEdge(arg_node->GetOutDataAnchor(0), node->GetInDataAnchor(index)), | ||||
"Add edge[%s->%s] fail.", data_op->GetName().c_str(), node->GetName().c_str()); | |||||
"[Add][Edge]fail from node:%s to node:%s", data_op->GetName().c_str(), node->GetName().c_str()); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -213,20 +221,23 @@ static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, cons | |||||
for (const auto &out_desc : outputs) { | for (const auto &out_desc : outputs) { | ||||
GeTensorDesc tensor = out_desc.GetTensorDesc(); | GeTensorDesc tensor = out_desc.GetTensorDesc(); | ||||
TensorUtils::SetInputTensor(tensor, true); | TensorUtils::SetInputTensor(tensor, true); | ||||
GE_CHK_BOOL_EXEC(op_desc->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail"); | |||||
GE_CHK_BOOL_EXEC(op_desc->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, | |||||
"[Add][InputDesc]fail for node:%s", op_desc->GetName().c_str()); | |||||
TensorUtils::SetInputTensor(tensor, false); | TensorUtils::SetInputTensor(tensor, false); | ||||
TensorUtils::SetOutputTensor(tensor, true); | TensorUtils::SetOutputTensor(tensor, true); | ||||
GE_CHK_BOOL_EXEC(op_desc->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail"); | |||||
GE_CHK_BOOL_EXEC(op_desc->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, | |||||
"[Add][OutputDesc]fail for node:%s", op_desc->GetName().c_str()); | |||||
count++; | count++; | ||||
} | } | ||||
GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | ||||
ge::NodePtr out_node = graph->AddNode(op_desc); | ge::NodePtr out_node = graph->AddNode(op_desc); | ||||
GE_CHK_BOOL_EXEC(out_node != nullptr, return FAILED, "Insert Output node fail."); | |||||
GE_CHK_BOOL_EXEC(out_node != nullptr, return FAILED, | |||||
"[Add][Node:%s]fail in graph:%u", op_desc->GetName().c_str(), graph->GetGraphID()); | |||||
GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | ||||
for (int32_t i = 0; i < count; ++i) { | for (int32_t i = 0; i < count; ++i) { | ||||
GE_CHK_STATUS(GraphUtils::AddEdge(node->GetOutDataAnchor(i), out_node->GetInDataAnchor(i)), | GE_CHK_STATUS(GraphUtils::AddEdge(node->GetOutDataAnchor(i), out_node->GetInDataAnchor(i)), | ||||
"Add edge[%s->%s] fail.", node->GetName().c_str(), out_node->GetName().c_str()); | |||||
"[Add][Edge]fail from node:%s to node:%s", node->GetName().c_str(), out_node->GetName().c_str()); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -710,7 +721,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
auto node = comp_graph->FindNode(op_desc->GetName()); | auto node = comp_graph->FindNode(op_desc->GetName()); | ||||
Status ret = CheckEngineTypeSupport(node, engine_type); | Status ret = CheckEngineTypeSupport(node, engine_type); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "check engine type failed."); | |||||
GELOGE(ret, "[Check][EngineType]value:%d for node:%s not support", engine_type, node->GetName().c_str()); | |||||
return ret; | return ret; | ||||
} | } | ||||
} | } | ||||
@@ -915,6 +926,13 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> | |||||
static std::atomic<uint64_t> atomic_session_id(0); | static std::atomic<uint64_t> atomic_session_id(0); | ||||
auto session_id = atomic_session_id.fetch_add(1); | auto session_id = atomic_session_id.fetch_add(1); | ||||
// This is a temporary add for graph with variable | |||||
auto version = static_cast<int32_t>(SessionVersion::ClOUD_VERSION); | |||||
ret = VarManager::Instance(session_id)->Init(version, session_id, kDefaultDeviceId, kDefaultJobId); | |||||
GELOGI("Start init var instance, session_id %lu", session_id); | |||||
if (ret != SUCCESS) { | |||||
GELOGW("Failed init var instance, session_id %lu", session_id); | |||||
} | |||||
if (is_singleop_unregistered_) { | if (is_singleop_unregistered_) { | ||||
ret = graph_manager_.BuildGraphForUnregisteredOp(graph_id, inputs, ge_root_model, session_id); | ret = graph_manager_.BuildGraphForUnregisteredOp(graph_id, inputs, ge_root_model, session_id); | ||||
} else { | } else { | ||||
@@ -400,6 +400,10 @@ static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchor | |||||
} | } | ||||
static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) { | static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) { | ||||
if (graph->GetGraphUnknownFlag()) { | |||||
GELOGI("Graph %s is unknown graph, ignore gen_task for constant.", graph->GetName().c_str()); | |||||
return SUCCESS; | |||||
} | |||||
for (auto &node : graph->GetDirectNode()) { | for (auto &node : graph->GetDirectNode()) { | ||||
// CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT | // CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT | ||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
@@ -33,13 +33,21 @@ using std::queue; | |||||
namespace ge { | namespace ge { | ||||
LogicalStreamPass::LogicalStreamPass(const string &name) : name_(name) {} | LogicalStreamPass::LogicalStreamPass(const string &name) : name_(name) {} | ||||
const string &LogicalStreamPass::GetName() const { return name_; } | |||||
const string &LogicalStreamPass::GetName() const { | |||||
return name_; | |||||
} | |||||
bool LogicalStreamPass::IsEngineSkip(const Subgraph &subgraph) const { return subgraph.engine_conf.skip_assign_stream; } | |||||
bool LogicalStreamPass::IsEngineSkip(const Subgraph &subgraph) const { | |||||
return subgraph.engine_conf.skip_assign_stream; | |||||
} | |||||
bool LogicalStreamPass::IsEngineAttach(const Subgraph &subgraph) const { return subgraph.engine_conf.attach; } | |||||
bool LogicalStreamPass::IsEngineAttach(const Subgraph &subgraph) const { | |||||
return subgraph.engine_conf.attach; | |||||
} | |||||
bool LogicalStreamPass::IsEngineIndependent(const Subgraph &subgraph) const { return subgraph.engine_conf.independent; } | |||||
bool LogicalStreamPass::IsEngineIndependent(const Subgraph &subgraph) const { | |||||
return subgraph.engine_conf.independent; | |||||
} | |||||
bool LogicalStreamPass::HasStreamLabel(const Subgraph &subgraph) const { | bool LogicalStreamPass::HasStreamLabel(const Subgraph &subgraph) const { | ||||
return !subgraph.subgraph_info.GetStreamLabel().empty(); | return !subgraph.subgraph_info.GetStreamLabel().empty(); | ||||
@@ -60,14 +68,14 @@ Status AssignByLabelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> & | |||||
// Subgraphs of the same stream_label are assigned to the same stream, | // Subgraphs of the same stream_label are assigned to the same stream, | ||||
// and different stream_labels are assigned new streams. | // and different stream_labels are assigned new streams. | ||||
auto iter = label_streams.find(stream_label); | auto iter = label_streams.find(stream_label); | ||||
if (iter != label_streams.end()) { | |||||
subgraph->stream_id = iter->second; | |||||
} else { | |||||
if (iter == label_streams.end()) { | |||||
subgraph->stream_id = next_stream; | subgraph->stream_id = next_stream; | ||||
GELOGI("Assign new stream %ld for label %s.", next_stream, stream_label.c_str()); | GELOGI("Assign new stream %ld for label %s.", next_stream, stream_label.c_str()); | ||||
label_streams.emplace(stream_label, next_stream); | label_streams.emplace(stream_label, next_stream); | ||||
++next_stream; | |||||
next_stream++; | |||||
} else { | |||||
subgraph->stream_id = iter->second; | |||||
} | } | ||||
changed = true; | changed = true; | ||||
} | } | ||||
@@ -92,15 +100,15 @@ Status IndependentStreamPass::Run(ComputeGraphPtr graph, const vector<SubgraphPt | |||||
const string &stream_label = subgraph->subgraph_info.GetStreamLabel(); | const string &stream_label = subgraph->subgraph_info.GetStreamLabel(); | ||||
auto &label_streams = engine_streams[engine]; | auto &label_streams = engine_streams[engine]; | ||||
auto iter = label_streams.find(stream_label); | auto iter = label_streams.find(stream_label); | ||||
if (iter != label_streams.end()) { | |||||
subgraph->stream_id = iter->second; | |||||
} else { | |||||
if (iter == label_streams.end()) { | |||||
subgraph->stream_id = next_stream; | subgraph->stream_id = next_stream; | ||||
GELOGI("Assign new independent stream %ld for engine %s (label: %s).", next_stream, engine.c_str(), | GELOGI("Assign new independent stream %ld for engine %s (label: %s).", next_stream, engine.c_str(), | ||||
stream_label.c_str()); | stream_label.c_str()); | ||||
label_streams.emplace(stream_label, next_stream); | label_streams.emplace(stream_label, next_stream); | ||||
++next_stream; | |||||
next_stream++; | |||||
} else { | |||||
subgraph->stream_id = iter->second; | |||||
} | } | ||||
changed = true; | changed = true; | ||||
} | } | ||||
@@ -121,7 +129,9 @@ Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vector<SubgraphP | |||||
} | } | ||||
SubgraphPtr reusable_subgraph = GetReusableSubgraph(subgraph, end_subgraph_map, pld_subgraph_map); | SubgraphPtr reusable_subgraph = GetReusableSubgraph(subgraph, end_subgraph_map, pld_subgraph_map); | ||||
if (reusable_subgraph != nullptr) { | |||||
if (reusable_subgraph == nullptr) { | |||||
(void)AssignNewStream(subgraph); | |||||
} else { | |||||
if (HasAssignedStream(*reusable_subgraph)) { | if (HasAssignedStream(*reusable_subgraph)) { | ||||
subgraph->stream_id = reusable_subgraph->stream_id; | subgraph->stream_id = reusable_subgraph->stream_id; | ||||
} else { | } else { | ||||
@@ -140,8 +150,6 @@ Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vector<SubgraphP | |||||
GELOGI("Subgraph %s of engine %s reuses stream of subgraph %s of engine %s.", subgraph->name.c_str(), | GELOGI("Subgraph %s of engine %s reuses stream of subgraph %s of engine %s.", subgraph->name.c_str(), | ||||
subgraph->engine_conf.id.c_str(), reusable_subgraph->name.c_str(), | subgraph->engine_conf.id.c_str(), reusable_subgraph->name.c_str(), | ||||
reusable_subgraph->engine_conf.id.c_str()); | reusable_subgraph->engine_conf.id.c_str()); | ||||
} else { | |||||
(void)AssignNewStream(subgraph); | |||||
} | } | ||||
changed = true; | changed = true; | ||||
} | } | ||||
@@ -191,13 +199,15 @@ bool AssignByDependencyPass::CouldReuse(const SubgraphPtr &subgraph, const Subgr | |||||
auto iter = pld_subgraph_map.find(end_pld_pair.second); | auto iter = pld_subgraph_map.find(end_pld_pair.second); | ||||
if (iter != pld_subgraph_map.end()) { | if (iter != pld_subgraph_map.end()) { | ||||
const SubgraphPtr &pred_subgraph_succ = iter->second; | const SubgraphPtr &pred_subgraph_succ = iter->second; | ||||
if (pred_subgraph_succ != subgraph && pred_subgraph_succ->engine_conf.id == pred_subgraph->engine_conf.id) { | |||||
if ((pred_subgraph_succ != subgraph) && | |||||
(pred_subgraph_succ->engine_conf.id == pred_subgraph->engine_conf.id)) { | |||||
return false; | return false; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
if ((subgraph->engine_conf.id == pred_subgraph->engine_conf.id) || IsEngineAttach(*subgraph)) { | |||||
if ((subgraph->engine_conf.id == pred_subgraph->engine_conf.id) || | |||||
IsEngineAttach(*subgraph)) { | |||||
return true; | return true; | ||||
} | } | ||||
@@ -406,7 +416,7 @@ Status UpdateForSkippedEnginePass::Run(ComputeGraphPtr graph, const vector<Subgr | |||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
auto stream_id = op_desc->GetStreamId(); | auto stream_id = op_desc->GetStreamId(); | ||||
if (stream_id != kInvalidStream && !HasStreamLabel(*subgraph)) { | |||||
if ((stream_id != kInvalidStream) && !HasStreamLabel(*subgraph)) { | |||||
ops_without_label.emplace(op_desc); | ops_without_label.emplace(op_desc); | ||||
} | } | ||||
} | } | ||||
@@ -463,7 +473,7 @@ Status AllReduceParallelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPt | |||||
for (const NodePtr &node : graph->GetDirectNode()) { | for (const NodePtr &node : graph->GetDirectNode()) { | ||||
if (!IsHcomNode(node->GetType()) || | if (!IsHcomNode(node->GetType()) || | ||||
node->GetInDataNodes().size() <= 1) { | |||||
(node->GetInDataNodes().size() <= 1)) { | |||||
continue; | continue; | ||||
} | } | ||||
@@ -575,7 +585,7 @@ Status LogicalStreamAllocator::DoAssign(const ComputeGraphPtr &graph, const Grap | |||||
GE_CHECK_NOTNULL(graph); | GE_CHECK_NOTNULL(graph); | ||||
NodePtr parent_node = graph->GetParentNode(); | NodePtr parent_node = graph->GetParentNode(); | ||||
if (parent_node == nullptr || parent_node->GetOpDesc() == nullptr) { | |||||
if ((parent_node == nullptr) || (parent_node->GetOpDesc() == nullptr)) { | |||||
context_.default_stream = kInvalidStream; | context_.default_stream = kInvalidStream; | ||||
} else { | } else { | ||||
context_.default_stream = parent_node->GetOpDesc()->GetStreamId(); | context_.default_stream = parent_node->GetOpDesc()->GetStreamId(); | ||||
@@ -597,7 +607,7 @@ Status LogicalStreamAllocator::DoAssign(const ComputeGraphPtr &graph, const Grap | |||||
return status; | return status; | ||||
} | } | ||||
GELOGD("Subgraphs of graph %s:", graph->GetName().c_str()); | |||||
GELOGD("Subgraphs of graph %s", graph->GetName().c_str()); | |||||
for (const auto &subgraph : subgraphs) { | for (const auto &subgraph : subgraphs) { | ||||
if (subgraph != nullptr) { | if (subgraph != nullptr) { | ||||
GELOGD("subgraph: %s", subgraph->name.c_str()); | GELOGD("subgraph: %s", subgraph->name.c_str()); | ||||
@@ -686,7 +696,7 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra | |||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
if (op_desc != nullptr) { | if (op_desc != nullptr) { | ||||
int64_t stream_id = op_desc->GetStreamId(); | int64_t stream_id = op_desc->GetStreamId(); | ||||
if (stream_id != kInvalidStream && stream_id < stream_num) { | |||||
if ((stream_id != kInvalidStream) && (stream_id < stream_num)) { | |||||
stream_has_node[stream_id] = true; | stream_has_node[stream_id] = true; | ||||
} | } | ||||
} | } | ||||
@@ -695,10 +705,10 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra | |||||
context_.next_stream = 0; | context_.next_stream = 0; | ||||
vector<int64_t> old_to_new_streams(stream_num, kInvalidStream); | vector<int64_t> old_to_new_streams(stream_num, kInvalidStream); | ||||
for (size_t old_stream = 0; old_stream < stream_has_node.size(); ++old_stream) { | |||||
for (size_t old_stream = 0; old_stream < stream_has_node.size(); old_stream++) { | |||||
if (stream_has_node[old_stream]) { | if (stream_has_node[old_stream]) { | ||||
old_to_new_streams[old_stream] = context_.next_stream; | old_to_new_streams[old_stream] = context_.next_stream; | ||||
++context_.next_stream; | |||||
context_.next_stream++; | |||||
} | } | ||||
} | } | ||||
@@ -706,7 +716,7 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra | |||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
if (op_desc != nullptr) { | if (op_desc != nullptr) { | ||||
int64_t stream_id = op_desc->GetStreamId(); | int64_t stream_id = op_desc->GetStreamId(); | ||||
if (stream_id != kInvalidStream && stream_id < stream_num) { | |||||
if ((stream_id != kInvalidStream) && (stream_id < stream_num)) { | |||||
op_desc->SetStreamId(old_to_new_streams[stream_id]); | op_desc->SetStreamId(old_to_new_streams[stream_id]); | ||||
} | } | ||||
} | } | ||||
@@ -70,7 +70,10 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) { | if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) { | ||||
GELOGE(FAILED, "Memory size:%ld is invalid.", all_memory_size.front()); | |||||
GELOGE(FAILED, "[Check][MemRangeStep]first mem_range_step:%ld less than 0,invalid," | |||||
"maybe has dynamic shape in graph", all_memory_size.front()); | |||||
REPORT_INNER_ERROR("E19999", "first mem_range_step:%ld less than 0,invalid," | |||||
"maybe has dynamic shape in graph", all_memory_size.front()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// Memory size is 512 aligned, so it is not necessary to take less than 512 | // Memory size is 512 aligned, so it is not necessary to take less than 512 | ||||
@@ -81,12 +84,18 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) { | |||||
GELOGD("Range number: %zu", range_number); | GELOGD("Range number: %zu", range_number); | ||||
vector<vector<int64_t>> ranges(range_number); | vector<vector<int64_t>> ranges(range_number); | ||||
GE_CHK_BOOL_EXEC((range_number != 0), return PARAM_INVALID, "range_number can't be 0."); | |||||
GE_CHK_BOOL_EXEC((range_number != 0), | |||||
REPORT_INNER_ERROR("E19999", "inner data[range_number] is 0, judge invalid"); | |||||
return PARAM_INVALID, | |||||
"[Check][RangeNumber]inner data is 0, judge invalid."); | |||||
size_t range_number_limit = all_memory_size.size() / range_number; | size_t range_number_limit = all_memory_size.size() / range_number; | ||||
int64_t range_ceil = min_memory_size; | int64_t range_ceil = min_memory_size; | ||||
for (size_t i = 1; i <= range_number; i++) { | for (size_t i = 1; i <= range_number; i++) { | ||||
GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(static_cast<uint64_t>(range_ceil), kRangeCeilInterval), | GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(static_cast<uint64_t>(range_ceil), kRangeCeilInterval), | ||||
GELOGE(FAILED, "Multiply result is out of range."); | |||||
GELOGE(FAILED, "[Check][MemRangeCeil]Multiply result is out of range," | |||||
"range_ceil:%ld, interval:%u", range_ceil, kRangeCeilInterval); | |||||
REPORT_INNER_ERROR("E19999", "process mem_range_ceil,multiply result out of range," | |||||
"range_ceil:%ld, interval:%u", range_ceil, kRangeCeilInterval); | |||||
return FAILED); | return FAILED); | ||||
range_ceil *= kRangeCeilInterval; // The block size of each interval is doubled every time. | range_ceil *= kRangeCeilInterval; // The block size of each interval is doubled every time. | ||||
for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) { | for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) { | ||||
@@ -30,6 +30,7 @@ | |||||
#include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
#include "graph/utils/op_desc_utils.h" | #include "graph/utils/op_desc_utils.h" | ||||
#include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
#include "graph/utils/type_utils.h" | |||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
@@ -457,7 +458,16 @@ Status GetNoAlignSize(const ge::OpDesc &desc, uint32_t index, size_t &size) { | |||||
DataType data_type = output_op_desc->GetDataType(); | DataType data_type = output_op_desc->GetDataType(); | ||||
graphStatus graph_status = TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size); | graphStatus graph_status = TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size); | ||||
if (graph_status != GRAPH_SUCCESS) { | if (graph_status != GRAPH_SUCCESS) { | ||||
GELOGE(graph_status, "CalcTensorMemSize failed!"); | |||||
GELOGE(graph_status, "[Calculate][TensorSize]shape:%s, format:%s, data_type:%s, op:%s, out_index:%u", | |||||
shape.ToString().c_str(), | |||||
TypeUtils::FormatToSerialString(format).c_str(), | |||||
TypeUtils::DataTypeToSerialString(data_type).c_str(), | |||||
desc.GetName().c_str(), index); | |||||
REPORT_CALL_ERROR("E19999", "CalcTensorMemSize fail, shape:%s, format:%s, data_type:%s, op:%s, out_index:%u", | |||||
shape.ToString().c_str(), | |||||
TypeUtils::FormatToSerialString(format).c_str(), | |||||
TypeUtils::DataTypeToSerialString(data_type).c_str(), | |||||
desc.GetName().c_str(), index); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
size = static_cast<size_t>(tensor_size); | size = static_cast<size_t>(tensor_size); | ||||
@@ -586,9 +596,12 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | |||||
GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); | GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); | ||||
int64_t size = 0; | int64_t size = 0; | ||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); | GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); | ||||
GE_IF_BOOL_EXEC(size < 0, GELOGE(FAILED, "Node:%s size:%ld is invalid, maybe it is unknown shape node.", | |||||
node_op_desc->GetName().c_str(), size); | |||||
return;); | |||||
GE_IF_BOOL_EXEC(size < 0, | |||||
GELOGE(FAILED, "[Check][TensorSize]tensor_size:%ld is invalid, maybe it is unknown shape node, Node_name:%s", | |||||
size, node_op_desc->GetName().c_str()); | |||||
REPORT_INNER_ERROR("E19999", "tensor_size:%ld is invalid, maybe it is unknown shape node, Node_name:%s", | |||||
size, node_op_desc->GetName().c_str()); | |||||
return;); | |||||
batch_all_memory_size[batch_label].emplace_back(size); | batch_all_memory_size[batch_label].emplace_back(size); | ||||
if (batch_total_size.find(batch_label) == batch_total_size.end()) { | if (batch_total_size.find(batch_label) == batch_total_size.end()) { | ||||
batch_total_size[batch_label] = size; | batch_total_size[batch_label] = size; | ||||
@@ -678,22 +691,34 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou | |||||
if (static_cast<size_t>(out_index) < n->GetAllOutDataAnchors().size()) { | if (static_cast<size_t>(out_index) < n->GetAllOutDataAnchors().size()) { | ||||
auto out_anchor = n->GetOutDataAnchor(out_index); | auto out_anchor = n->GetOutDataAnchor(out_index); | ||||
GE_IF_BOOL_EXEC(out_anchor == nullptr, | GE_IF_BOOL_EXEC(out_anchor == nullptr, | ||||
GELOGE(FAILED, "Node[%s] output[%u] anchor is null.", n->GetName().c_str(), out_index); | |||||
GELOGE(FAILED, "[Check][Anchor]Node[%s] output[%u] anchor is null.", | |||||
n->GetName().c_str(), out_index); | |||||
REPORT_INNER_ERROR("E19999", "output anchor is null, node_name: %s output_index: %u.", | |||||
n->GetName().c_str(), out_index); | |||||
return false;); | return false;); | ||||
for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { | for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { | ||||
GE_IF_BOOL_EXEC(peer_in_anchor == nullptr, | GE_IF_BOOL_EXEC(peer_in_anchor == nullptr, | ||||
GELOGE(FAILED, "Node[%s] output[%u] peer_in_anchor 0 is null.", n->GetName().c_str(), out_index); | |||||
GELOGE(FAILED, "[Check][Anchor]Node[%s] output[%u] peer_in_anchor 0 is null.", | |||||
n->GetName().c_str(), out_index); | |||||
REPORT_INNER_ERROR("E19999", "output anchor peer is null, node_name: %s output_index: %u.", | |||||
n->GetName().c_str(), out_index); | |||||
return false;); | return false;); | ||||
auto peer_node = peer_in_anchor->GetOwnerNode(); | auto peer_node = peer_in_anchor->GetOwnerNode(); | ||||
GE_IF_BOOL_EXEC(peer_node == nullptr, | GE_IF_BOOL_EXEC(peer_node == nullptr, | ||||
GELOGE(FAILED, "Node[%s] output[%u] node is null.", n->GetName().c_str(), out_index); | |||||
GELOGE(FAILED, "[Check][Node]Node[%s] output[%u] peer node is null.", | |||||
n->GetName().c_str(), out_index); | |||||
REPORT_INNER_ERROR("E19999", "output anchor peer node is null, node_name: %s output_index: %u.", | |||||
n->GetName().c_str(), out_index); | |||||
return false;); | return false;); | ||||
// Get the continuous input type of the node, default is false | // Get the continuous input type of the node, default is false | ||||
bool is_input_continuous = false; | bool is_input_continuous = false; | ||||
auto peer_in_node_desc = peer_node->GetOpDesc(); | auto peer_in_node_desc = peer_node->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr, | GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr, | ||||
GELOGE(FAILED, "Node[%s] output[%u] nodedesc is null.", n->GetName().c_str(), out_index); | |||||
GELOGE(FAILED, "[Check][OpDesc]Node[%s] output[%u] nodedesc is null.", | |||||
n->GetName().c_str(), out_index); | |||||
REPORT_INNER_ERROR("E19999", "output anchor peer op_desc is null, node_name:%s output_index:%u.", | |||||
n->GetName().c_str(), out_index); | |||||
return false;); | return false;); | ||||
// If GetBool fail, is_input_continuous is false. | // If GetBool fail, is_input_continuous is false. | ||||
@@ -793,7 +818,10 @@ bool BlockMemAssigner::IsContinuousMemoryReuse(const NodePtr &n, const NodePtr & | |||||
if ((in_anchor == nullptr) || (in_anchor->GetPeerOutAnchor() == nullptr) || | if ((in_anchor == nullptr) || (in_anchor->GetPeerOutAnchor() == nullptr) || | ||||
(in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) || | (in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) || | ||||
(in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) { | (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) { | ||||
GELOGE(FAILED, "Node[%s] output[%u] peer input node desc is null.", n->GetName().c_str(), out_index); | |||||
GELOGE(FAILED, "[Check][OpDesc]Node[%s] output[%u] peer input node desc is null.", | |||||
n->GetName().c_str(), out_index); | |||||
REPORT_INNER_ERROR("E19999", "get output anchor peer op_desc fail, node_name: %s output_index: %u.", | |||||
n->GetName().c_str(), out_index); | |||||
return false; | return false; | ||||
} | } | ||||
auto peer_out_node_desc = in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc(); | auto peer_out_node_desc = in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc(); | ||||
@@ -1077,7 +1105,9 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
OpMemoryType mem_type, const NodePtr &n, uint32_t out_index, | OpMemoryType mem_type, const NodePtr &n, uint32_t out_index, | ||||
const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem, | const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem, | ||||
const bool continuous, int64_t memory_type) { | const bool continuous, int64_t memory_type) { | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "Input parameter n is null."); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed"); | |||||
return nullptr, "[Check][Param]Input parameter n(type:node_ptr) is null."); | |||||
auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(node_op_desc == nullptr, return nullptr); | GE_IF_BOOL_EXEC(node_op_desc == nullptr, return nullptr); | ||||
std::string batch_label; | std::string batch_label; | ||||
@@ -1129,7 +1159,10 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
} | } | ||||
auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory, memory_type); | auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory, memory_type); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "new an object failed."); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "new a memoryblock object failed. node_name:%s out_index:%u", | |||||
n->GetName().c_str(), out_index); | |||||
return nullptr, "[New][Object]new MemoryBlock failed, node_name:%s out_index:%u", n->GetName().c_str(), out_index); | |||||
// Data and netoutput need zero copy block | // Data and netoutput need zero copy block | ||||
block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | ||||
@@ -1188,9 +1221,13 @@ void BlockMemAssigner::ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutpu | |||||
Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, | Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, | ||||
const bool is_op_reuse_mem) { | const bool is_op_reuse_mem) { | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return INTERNAL_ERROR, "input node is null."); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null"); | |||||
return INTERNAL_ERROR, "[check][param]Input parameter n(type:NodePtr) is null."); | |||||
auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return INTERNAL_ERROR, "node_op_desc is null."); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null"); | |||||
return INTERNAL_ERROR, "[Check][Param]Input parameter n(type:OpDescPtr) is null"); | |||||
// continuous output support ref only when all output ref input | // continuous output support ref only when all output ref input | ||||
bool isAllOutputRef = true; | bool isAllOutputRef = true; | ||||
@@ -1204,7 +1241,9 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
} | } | ||||
if (!isAllOutputRef && isOutputHasRef) { | if (!isAllOutputRef && isOutputHasRef) { | ||||
GELOGE(INTERNAL_ERROR, "continuous output node ref part input, not support this situation, node_name:%s", | |||||
REPORT_INNER_ERROR("E19999", "continuous output node ref part input, not support now. node_name:%s", | |||||
n->GetName().c_str()); | |||||
GELOGE(INTERNAL_ERROR, "[Check][OutRefStatus]continuous output node ref part input, not support, node_name:%s", | |||||
n->GetName().c_str()); | n->GetName().c_str()); | ||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
@@ -1215,7 +1254,9 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) { | for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) { | ||||
auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | ||||
if (output_op_desc == nullptr) { | if (output_op_desc == nullptr) { | ||||
GELOGE(INTERNAL_ERROR, "Get output desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u", | |||||
n->GetName().c_str(), index); | |||||
GELOGE(INTERNAL_ERROR, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
@@ -1226,7 +1267,9 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
int64_t size = 0; | int64_t size = 0; | ||||
if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { | if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { | ||||
GELOGE(INTERNAL_ERROR, "Get size failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
REPORT_CALL_ERROR("E19999", "get tensor_size failed, node_name:%s, output_index:%u", | |||||
n->GetName().c_str(), index); | |||||
GELOGE(INTERNAL_ERROR, "[Get][TensorSize]node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
size_t align_size = static_cast<size_t>(size); | size_t align_size = static_cast<size_t>(size); | ||||
@@ -1266,7 +1309,9 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
block->last_continuous_block_ = true; | block->last_continuous_block_ = true; | ||||
++(block->ref_count_); | ++(block->ref_count_); | ||||
} else { | } else { | ||||
GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str()); | |||||
REPORT_CALL_ERROR("E19999", "apply continuousMemory failed, node_name:%s, total_size:%ld", | |||||
n->GetName().c_str(), total_size); | |||||
GELOGE(INTERNAL_ERROR, "[Apply][ContinuousMemory]node_name:%s, total_size:%ld", n->GetName().c_str(), total_size); | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -1274,25 +1319,37 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges, | MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges, | ||||
const bool is_op_reuse_mem, const bool continuous) { | const bool is_op_reuse_mem, const bool continuous) { | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "Input parameter n(type:NodePtr) is null"); | |||||
return nullptr, "[Check][Param]Input parameter n(type:NodePtr) is null"); | |||||
auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null"); | |||||
return nullptr, "[Check][Param]Input parameter n(type:OpDescPtr) is null"); | |||||
MemoryBlock *block = nullptr; | MemoryBlock *block = nullptr; | ||||
NodeIndexIO node_index_io(n, index, kOut); | NodeIndexIO node_index_io(n, index, kOut); | ||||
int64_t size = 0; | int64_t size = 0; | ||||
auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | ||||
GE_IF_BOOL_EXEC(output_op_desc == nullptr, return nullptr); | |||||
GE_IF_BOOL_EXEC(output_op_desc == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
GELOGE(FAILED, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
return nullptr); | |||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | ||||
size_t no_align_size = 0; | size_t no_align_size = 0; | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, | ||||
return nullptr, "Get no align size failed"); | |||||
REPORT_CALL_ERROR("E19999", "Get no align size failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
return nullptr, "[Get][TensorSize]Get no align size, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
std::string symbol; | std::string symbol; | ||||
bool reuse_input = false; | bool reuse_input = false; | ||||
if (IsSymbolExist(node_index_io, symbol)) { | if (IsSymbolExist(node_index_io, symbol)) { | ||||
block = symbol_blocks_[symbol]; | block = symbol_blocks_[symbol]; | ||||
GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str()); | |||||
return nullptr); | |||||
GE_IF_BOOL_EXEC(block == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "get ref block failed, node_name:%s, symbol:%s", | |||||
node_op_desc->GetName().c_str(), node_index_io.ToString().c_str()); | |||||
GELOGE(FAILED, "[Get][RefBlock]node_name:%s, symbol:%s", | |||||
node_op_desc->GetName().c_str(), node_index_io.ToString().c_str()); | |||||
return nullptr); | |||||
// reduce old size | // reduce old size | ||||
size_t align_size = block->Size(); | size_t align_size = block->Size(); | ||||
AlignMemOffset(align_size); | AlignMemOffset(align_size); | ||||
@@ -1335,12 +1392,24 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
vector<bool> workspace_reuse_flag; | vector<bool> workspace_reuse_flag; | ||||
block = ApplyMemory(block_size, size, no_align_size, kOutput, n, index, | block = ApplyMemory(block_size, size, no_align_size, kOutput, n, index, | ||||
workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type); | workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, | |||||
REPORT_CALL_ERROR("E19999", "apply out Memory failed, node_name:%s, block_size:%ld, out_index:%u", | |||||
n->GetName().c_str(), block_size, index); | |||||
return nullptr, "[Apply][Memory]node_name:%s, block_size:%ld, out_index:%u", | |||||
n->GetName().c_str(), block_size, index); | |||||
} | } | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "Block is nullptr."); | |||||
int out_count = 0; | int out_count = 0; | ||||
GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), GELOGE(FAILED, "index is out of range."); return nullptr); | |||||
GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), | |||||
REPORT_INNER_ERROR("E19999", "out index:%u exceed out_size:%lu, node_name:%s", | |||||
index, n->GetAllOutDataAnchors().size(), n->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][OutIndex]index:%u exceed out_size:%lu, node_name:%s", | |||||
index, n->GetAllOutDataAnchors().size(), n->GetName().c_str()); | |||||
return nullptr); | |||||
auto out_data_anchor = n->GetOutDataAnchor(index); | auto out_data_anchor = n->GetOutDataAnchor(index); | ||||
GE_IF_BOOL_EXEC(out_data_anchor == nullptr, GELOGE(FAILED, "Out data anchor is nullptr."); return nullptr); | |||||
GE_IF_BOOL_EXEC(out_data_anchor == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "out anchor is null, index:%u, node_name:%s", index, n->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][OutAnchor]is null, index:%u, node_name:%s", index, n->GetName().c_str()); | |||||
return nullptr); | |||||
for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { | for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { | ||||
auto owner_node = in_anchor->GetOwnerNode(); | auto owner_node = in_anchor->GetOwnerNode(); | ||||
auto op_desc = owner_node->GetOpDesc(); | auto op_desc = owner_node->GetOpDesc(); | ||||
@@ -1546,8 +1615,13 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
GELOGD("Assign memory node[%s], output size[%zu], output memory type size[%zu]", op_desc->GetName().c_str(), | GELOGD("Assign memory node[%s], output size[%zu], output memory type size[%zu]", op_desc->GetName().c_str(), | ||||
op_desc->GetOutputsSize(), memorys_type.size()); | op_desc->GetOutputsSize(), memorys_type.size()); | ||||
if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) { | if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) { | ||||
GELOGE(INTERNAL_ERROR, "fusion: node[%s], output memory size err[outputsize:%zu, memorysize:%zu]", | |||||
op_desc->GetName().c_str(), op_desc->GetOutputsSize(), memorys_type.size()); | |||||
REPORT_INNER_ERROR("E19999", "Attr[%s] size:%zu not equal to node output size:%zu, node_name:%s", | |||||
ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(), | |||||
op_desc->GetOutputsSize(), op_desc->GetName().c_str()); | |||||
GELOGE(INTERNAL_ERROR, | |||||
"[Check][MemTypeAttr]Attr %s size:%zu not equal to node output size:%zu, node_name:%s", | |||||
ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(), | |||||
op_desc->GetOutputsSize(), op_desc->GetName().c_str()); | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
@@ -1673,8 +1747,10 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||||
temp.size(), tvm_workspace_memory_type.size()); | temp.size(), tvm_workspace_memory_type.size()); | ||||
if (has_tvm_workspace_mem_type_attr && (temp.size() != tvm_workspace_memory_type.size())) { | if (has_tvm_workspace_mem_type_attr && (temp.size() != tvm_workspace_memory_type.size())) { | ||||
GELOGE(INTERNAL_ERROR, "fusion: node[%s], tvm workspace memory size error![v_temp:%zu, workspace:%zu]", | |||||
n->GetName().c_str(), temp.size(), tvm_workspace_memory_type.size()); | |||||
REPORT_INNER_ERROR("E19999", "Attr[%s]size:%zu is not equal to workspace size:%zu, node_name:%s", | |||||
TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), temp.size(), n->GetName().c_str()); | |||||
GELOGE(INTERNAL_ERROR, "[Check][Attr]Attr %s size:%zu is not equal to workspace size:%zu, node_name:%s", | |||||
TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), temp.size(), n->GetName().c_str()); | |||||
return; | return; | ||||
} | } | ||||
for (size_t i = 0; i < temp.size(); i++) { | for (size_t i = 0; i < temp.size(); i++) { | ||||
@@ -2083,8 +2159,11 @@ bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, | |||||
bool has_workspace_mem_type_attr = | bool has_workspace_mem_type_attr = | ||||
ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, workspace_memory_type); | ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, workspace_memory_type); | ||||
if (has_workspace_mem_type_attr && (workspace_memory_type.size() <= index)) { | if (has_workspace_mem_type_attr && (workspace_memory_type.size() <= index)) { | ||||
GELOGE(INTERNAL_ERROR, "node[%s], workspace_memory size error![index:%zu, workspace:%zu]", | |||||
node->GetName().c_str(), index, workspace_memory_type.size()); | |||||
REPORT_INNER_ERROR("E19999", "get workspace mem_type failed, " | |||||
"index %zu invalid, bigger than attr %s size:%zu, node_name:%s", | |||||
index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), workspace_memory_type.size(), node->GetName().c_str()); | |||||
GELOGE(INTERNAL_ERROR, "[Get][WorkspaceMemType]index %zu invalid, bigger than attr %s size:%zu, node_name:%s", | |||||
index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), workspace_memory_type.size(), node->GetName().c_str()); | |||||
return false; | return false; | ||||
} | } | ||||
memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM; | memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM; | ||||
@@ -99,7 +99,8 @@ Status VariableMemoryAssigner::AssignMemory2HasRefAttrNode() { | |||||
Status GraphMemoryAssigner::AssignMemory() { | Status GraphMemoryAssigner::AssignMemory() { | ||||
ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_)); | ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_)); | ||||
if (mem_assigner->Assign() != ge::SUCCESS) { | if (mem_assigner->Assign() != ge::SUCCESS) { | ||||
GELOGE(ge::FAILED, "Memory assigner failed"); | |||||
GELOGE(ge::FAILED, "[Assign][GraphMem]graph_id:%u, graph_name:%s", | |||||
compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset()); | MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset()); | ||||
@@ -115,7 +116,10 @@ Status GraphMemoryAssigner::AssignMemory() { | |||||
auto variable_assigner = | auto variable_assigner = | ||||
std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); | std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); | ||||
if (variable_assigner == nullptr) { | if (variable_assigner == nullptr) { | ||||
GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed."); | |||||
GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s", | |||||
compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
REPORT_INNER_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
@@ -134,7 +138,10 @@ ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() { | |||||
auto variable_assigner = | auto variable_assigner = | ||||
std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); | std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); | ||||
if (variable_assigner == nullptr) { | if (variable_assigner == nullptr) { | ||||
GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed."); | |||||
GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s", | |||||
compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
REPORT_INNER_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
if (variable_assigner->AssignVarAttr2Nodes() != ge::SUCCESS) { | if (variable_assigner->AssignVarAttr2Nodes() != ge::SUCCESS) { | ||||
@@ -147,8 +154,10 @@ ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() { | |||||
auto variable_assigner = | auto variable_assigner = | ||||
std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); | std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); | ||||
if (variable_assigner == nullptr) { | if (variable_assigner == nullptr) { | ||||
GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed."); | |||||
return ge::FAILED; | |||||
GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s", | |||||
compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
REPORT_INNER_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
} | } | ||||
if (variable_assigner->AssignMemory2HasRefAttrNode() != ge::SUCCESS) { | if (variable_assigner->AssignMemory2HasRefAttrNode() != ge::SUCCESS) { | ||||
return ge::FAILED; | return ge::FAILED; | ||||
@@ -161,17 +170,18 @@ ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &out | |||||
int64_t &batch_dim_num, int64_t &out_size) { | int64_t &batch_dim_num, int64_t &out_size) { | ||||
graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); | graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); | ||||
if (graph_status != GRAPH_SUCCESS) { | if (graph_status != GRAPH_SUCCESS) { | ||||
GELOGE(FAILED, "Opdesc GetSize failed!"); | |||||
GELOGE(FAILED, "[Get][TensorSize]"); | |||||
REPORT_INNER_ERROR("E19999", "New Object:VariableMemoryAssigner failed when assign graph memory"); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
GeShape output_shape = output_desc->GetShape(); | GeShape output_shape = output_desc->GetShape(); | ||||
std::vector<int64_t> output_dims = output_shape.GetDims(); | std::vector<int64_t> output_dims = output_shape.GetDims(); | ||||
if (dim_index >= static_cast<int64_t>(output_dims.size())) { | if (dim_index >= static_cast<int64_t>(output_dims.size())) { | ||||
std::string error = "Invaild value" + FmtToStr(dim_index) + | |||||
" of attr _reuse_input_on_dim_index, which is out of data range [0," | |||||
+ std::to_string(output_dims.size()) + ")"; | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "Inner param dim_index value:%ld invalid, bigger than dim size:%lu in shape:%s", | |||||
dim_index, output_dims.size(), output_shape.ToString().c_str()); | |||||
GELOGE(FAILED, "[Check][Param:dim_index]value:%ld invalid, bigger than dim size:%lu in shape:%s", | |||||
dim_index, output_dims.size(), output_shape.ToString().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -187,14 +197,23 @@ ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &out | |||||
graph_status = ge::TensorUtils::CalcTensorMemSize(output_shape, out_format, data_type, output_mem_size); | graph_status = ge::TensorUtils::CalcTensorMemSize(output_shape, out_format, data_type, output_mem_size); | ||||
if (graph_status != GRAPH_SUCCESS) { | if (graph_status != GRAPH_SUCCESS) { | ||||
GELOGE(graph_status, "Opdesc CalcTensorMemSize failed!"); | |||||
GELOGE(graph_status, "[Calc][TensorSize]"); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
if (output_mem_size < 0) { | if (output_mem_size < 0) { | ||||
std::string error = "After calculating tensor memory size, output_mem_size" + FmtToStr(output_mem_size) + | |||||
" is out of data range [0," + std::to_string(INT64_MAX) + "]"; | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "After calculating, tensor memory size:%ld invalid, less than 0. " | |||||
"shape:%s, format:%s, dtype:%s, maybe has dynamic shape", | |||||
output_mem_size, | |||||
output_shape.ToString().c_str(), | |||||
TypeUtils::FormatToSerialString(out_format).c_str(), | |||||
TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||||
GELOGE(FAILED, "[Check][TensorSize]value:%ld invalid after calc, less than 0. shape:%s, format:%s, dtype:%s, " | |||||
"maybe has dynamic shape", | |||||
output_mem_size, | |||||
output_shape.ToString().c_str(), | |||||
TypeUtils::FormatToSerialString(out_format).c_str(), | |||||
TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -203,7 +222,10 @@ ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &out | |||||
Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset) { | Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset) { | ||||
if (memory_offset_.empty()) { | if (memory_offset_.empty()) { | ||||
GELOGE(FAILED, "memory_offset_ is empty."); | |||||
REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected when ReAssignMemory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][InnerData:memory_offset_]empty is not expected, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
@@ -218,8 +240,10 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size | |||||
auto session_id = compute_graph_->GetSessionID(); | auto session_id = compute_graph_->GetSessionID(); | ||||
if (total_mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) { | if (total_mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) { | ||||
GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", total_mem_offset, | |||||
VarManager::Instance(session_id)->GetGraphMemoryMaxSize()); | |||||
GELOGE(ge::FAILED, "[Check][TotalMemOffset] %zu is greater than memory manager malloc max size %zu, " | |||||
"graph_id:%u, graph_name:%s, reduce your batchsize or scale your model may solve problem", | |||||
total_mem_offset, VarManager::Instance(session_id)->GetGraphMemoryMaxSize(), | |||||
compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
for (auto iter : mem_type_to_offset) { | for (auto iter : mem_type_to_offset) { | ||||
ErrorManager::GetInstance().ATCReportErrMessage("E19022", {"memType", "size", "item", "maxsize"}, | ErrorManager::GetInstance().ATCReportErrMessage("E19022", {"memType", "size", "item", "maxsize"}, | ||||
{std::to_string(iter.first), std::to_string(iter.second), "featuremap", | {std::to_string(iter.first), std::to_string(iter.second), "featuremap", | ||||
@@ -234,7 +258,13 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size | |||||
Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) { | Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) { | ||||
BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger()); | BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger()); | ||||
GE_IF_BOOL_EXEC(priority_assigner == nullptr, GELOGE(FAILED, "Get priority_assigner failed."); return ge::FAILED;); | |||||
if (priority_assigner == nullptr) { | |||||
REPORT_INNER_ERROR("E19999", "InnerData priority_assigner nullptr, not expected when AssignZeroCopyMemory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][InnerData:priority_assigner]nullptr is invalid, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return ge::FAILED; | |||||
} | |||||
size_t mem_offset_tmp = mem_offset[RT_MEMORY_HBM]; | size_t mem_offset_tmp = mem_offset[RT_MEMORY_HBM]; | ||||
@@ -254,8 +284,11 @@ Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offse | |||||
zero_mem_copy_size = mem_offset[RT_MEMORY_HBM] - mem_offset_tmp; | zero_mem_copy_size = mem_offset[RT_MEMORY_HBM] - mem_offset_tmp; | ||||
auto iter = memory_offset_.find(RT_MEMORY_HBM); | auto iter = memory_offset_.find(RT_MEMORY_HBM); | ||||
if (iter == memory_offset_.end()) { | if (iter == memory_offset_.end()) { | ||||
std::string error = "Memory offset does not have memory type[HBM]"; | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], " | |||||
"not expected when AssignZeroCopyMemory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]" | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM]; | iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM]; | ||||
@@ -304,7 +337,7 @@ uint32_t GetContinuousMemoryType(const OpDescPtr &op_desc) { | |||||
} | } | ||||
if (continuous_type != 0) { | if (continuous_type != 0) { | ||||
GELOGI("Current node %s continuous type %d.", op_desc->GetName().c_str(), continuous_type); | |||||
GELOGI("Current node %s continuous type %d", op_desc->GetName().c_str(), continuous_type); | |||||
} | } | ||||
return continuous_type; | return continuous_type; | ||||
} | } | ||||
@@ -312,8 +345,9 @@ uint32_t GetContinuousMemoryType(const OpDescPtr &op_desc) { | |||||
Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &output_desc, uint32_t continuous_type, | Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &output_desc, uint32_t continuous_type, | ||||
int64_t &tensor_size, int64_t &nopadding_size) { | int64_t &tensor_size, int64_t &nopadding_size) { | ||||
if ((op_desc == nullptr) || (output_desc == nullptr)) { | if ((op_desc == nullptr) || (output_desc == nullptr)) { | ||||
GELOGE(FAILED, "Input para is nullptr."); | |||||
return FAILED; | |||||
REPORT_INNER_ERROR("E19999", "InnerData param op_desc or output_desc is nullptr, " | |||||
"not expected when GetMemorySize"); | |||||
GELOGE(FAILED, "[Check][Param]op_desc or output_desc is nullptr"); | |||||
} | } | ||||
tensor_size = 0; | tensor_size = 0; | ||||
nopadding_size = 0; | nopadding_size = 0; | ||||
@@ -322,7 +356,10 @@ Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &o | |||||
int64_t attr_dim_index; | int64_t attr_dim_index; | ||||
bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); | bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); | ||||
if (!get_attr_dim_flag) { | if (!get_attr_dim_flag) { | ||||
GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); | |||||
REPORT_INNER_ERROR("E19999", "Get Attr:%s failed when GetMemorySize, op_name:%s", | |||||
ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX.c_str(), op_desc->GetName().c_str()); | |||||
GELOGE(FAILED, "[Get][Attr:%s]fail for op_name:%s", | |||||
ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX.c_str(), op_desc->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -330,17 +367,25 @@ Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &o | |||||
int64_t batch_dim_num = 1; | int64_t batch_dim_num = 1; | ||||
if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, nopadding_size, batch_dim_num, tensor_size) != | if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, nopadding_size, batch_dim_num, tensor_size) != | ||||
SUCCESS) { | SUCCESS) { | ||||
GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s.", op_desc->GetName().c_str()); | |||||
REPORT_CALL_ERROR("E19999", "CalculateTensorRealSizeAndOutSize failed, attr_dim_index:%ld, op_name:%s", | |||||
attr_dim_index, op_desc->GetName().c_str()); | |||||
GELOGE(FAILED, "[Calculate][NopaddingSize]failed for node %s, attr_dim_index:%ld", | |||||
op_desc->GetName().c_str(), attr_dim_index); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} else { | } else { | ||||
if (ge::TensorUtils::GetSize(*output_desc, tensor_size) != ge::SUCCESS) { | if (ge::TensorUtils::GetSize(*output_desc, tensor_size) != ge::SUCCESS) { | ||||
GELOGE(FAILED, "GetSize failed."); | |||||
REPORT_INNER_ERROR("E19999", "Get Tensor Size failed, op_name:%s", op_desc->GetName().c_str()); | |||||
GELOGE(FAILED, "[Get][TensorSize]failed in padding case, op_name:%s", op_desc->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} | } | ||||
if ((tensor_size < 0) || (nopadding_size < 0)) { | if ((tensor_size < 0) || (nopadding_size < 0)) { | ||||
GELOGE(FAILED, "GetMemorySize for node %s failed.", op_desc->GetName().c_str()); | |||||
REPORT_INNER_ERROR("E19999", "GetMemorySize fail, " | |||||
"tensor_size:%ld or nopadding_size:%ld less than 0, invalid, op_name:%s", | |||||
tensor_size, nopadding_size, op_desc->GetName().c_str()); | |||||
GELOGE(FAILED, "[Get][MemorySize]tensor_size:%ld or nopadding_size:%ld less than 0, invalid, op_name:%s", | |||||
tensor_size, nopadding_size, op_desc->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -374,7 +419,7 @@ bool IsContinuousInputConflict(const ge::NodePtr &node, const OpDescPtr &peer_op | |||||
// If GetBool fail, is_peer_reference is false. | // If GetBool fail, is_peer_reference is false. | ||||
(void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); | (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); | ||||
GE_IF_BOOL_EXEC(is_peer_reference, | GE_IF_BOOL_EXEC(is_peer_reference, | ||||
std::string warning = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + | |||||
std::string warning = "[Check][Continuous]Current op" + FmtToStr(node->GetOpDesc()->GetName()) + | |||||
" requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + | " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + | ||||
" is ref. There may be conflict between the two."; | " is ref. There may be conflict between the two."; | ||||
GELOGW("%s", warning.c_str()); | GELOGW("%s", warning.c_str()); | ||||
@@ -404,7 +449,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||||
if (continuous_input) { | if (continuous_input) { | ||||
if (AssignContinuousInputMemoryWithAtomicProcessDirectly(node, node_2_continuous_type)) { | if (AssignContinuousInputMemoryWithAtomicProcessDirectly(node, node_2_continuous_type)) { | ||||
GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, continuous_type), | GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, continuous_type), | ||||
"Assign node %s continuous input memory failed.", node->GetName().c_str()) | |||||
"[Assign][Memory:Continuous:Input]fail for node:%s", node->GetName().c_str()) | |||||
} else { | } else { | ||||
nodes_stack.push_back(node); | nodes_stack.push_back(node); | ||||
} | } | ||||
@@ -413,10 +458,11 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||||
int64_t memory_type = RT_MEMORY_HBM; | int64_t memory_type = RT_MEMORY_HBM; | ||||
bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0); | bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0); | ||||
if (continuous_output) { | if (continuous_output) { | ||||
GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"), "Get node memory type failed."); | |||||
GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"), | |||||
"[Get][MemType]fail for node:%s", node->GetName().c_str()); | |||||
ret = AssignContinuousOutputMemory(node, memory_type, continuous_type); | ret = AssignContinuousOutputMemory(node, memory_type, continuous_type); | ||||
if (ret != ge::SUCCESS) { | if (ret != ge::SUCCESS) { | ||||
GELOGE(ret, "Assign continuous output memory failed!"); | |||||
GELOGE(ret, "[Assign][Memory:Continuous:Ouput]fail for node:%s", node->GetName().c_str()); | |||||
return ret; | return ret; | ||||
} | } | ||||
} | } | ||||
@@ -427,14 +473,16 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||||
nodes_stack.pop_back(); | nodes_stack.pop_back(); | ||||
auto iter = node_2_continuous_type.find(node); | auto iter = node_2_continuous_type.find(node); | ||||
if (iter == node_2_continuous_type.end()) { | if (iter == node_2_continuous_type.end()) { | ||||
GELOGE(FAILED, "node %s has no continuous type!", node->GetName().c_str()); | |||||
REPORT_INNER_ERROR("E19999", "Inner data error when process continuous memory alloc for node:%s, " | |||||
"but has no continuous type", node->GetName().c_str()); | |||||
GELOGE(FAILED, "[Get][ContinuousType] find fail for node:%s", node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second, true), | GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second, true), | ||||
"Assign node %s continuous input memory failed.", node->GetName().c_str()) | |||||
"[Assign][Memory:Continuous:Input]fail for node:%s.", node->GetName().c_str()) | |||||
} | } | ||||
for (auto pair : memory_offset_) { | for (auto pair : memory_offset_) { | ||||
GELOGD("After reassign continuous memory, memory type = %ld, mem_offset = %zu.", pair.first, | |||||
GELOGD("After reassign continuous memory, memory type = %ld, mem offset = %zu.", pair.first, | |||||
pair.second.mem_offset_); | pair.second.mem_offset_); | ||||
} | } | ||||
return ge::SUCCESS; | return ge::SUCCESS; | ||||
@@ -442,11 +490,13 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||||
Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | ||||
int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, bool reverse_refresh) { | int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, bool reverse_refresh) { | ||||
GELOGI("Current node %s needs continuous input.", node->GetName().c_str()); | |||||
GELOGI("Current node %s needs continuous input", node->GetName().c_str()); | |||||
auto iter = memory_offset_.find(memory_type); | auto iter = memory_offset_.find(memory_type); | ||||
if (iter == memory_offset_.end()) { | if (iter == memory_offset_.end()) { | ||||
std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type); | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "find memory offset fail for mem_type:%ld, " | |||||
"when assign continuous input memory for node:%s, ", memory_type, node->GetName().c_str()); | |||||
GELOGE(FAILED, "[Find][MemOffset]fail for mem_type:%ld, when AssignContinuousInputMemory for node:%s", | |||||
memory_type, node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// The head and tail of hcom continuous input should be added 512 | // The head and tail of hcom continuous input should be added 512 | ||||
@@ -459,8 +509,9 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
vector<int64_t> output_list_this = op_desc->GetOutputOffset(); | vector<int64_t> output_list_this = op_desc->GetOutputOffset(); | ||||
if (output_list_this.empty()) { | if (output_list_this.empty()) { | ||||
std::string error = "node:" + FmtToStr(op_desc->GetName()) + "has no output offset"; | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "No output offset in node :%s, not expected when assign continuous input memory", | |||||
node->GetName().c_str()); | |||||
GELOGE(FAILED, "[Get][OutputOffset] empty is invalid, node:%s", node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
(void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated); | (void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated); | ||||
@@ -480,8 +531,9 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
lx_fusion = lx_fusion && !offsets_of_fusion.empty(); | lx_fusion = lx_fusion && !offsets_of_fusion.empty(); | ||||
if (lx_fusion) { | if (lx_fusion) { | ||||
if (peer_out_data_anchor->GetIdx() >= static_cast<int>(offsets_of_fusion.size())) { | if (peer_out_data_anchor->GetIdx() >= static_cast<int>(offsets_of_fusion.size())) { | ||||
std::string error = "fusion: peer node" + FmtToStr(peer_op_desc->GetName()) + | |||||
" index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; | |||||
std::string error = "fusion: peer node:" + FmtToStr(peer_op_desc->GetName()) + | |||||
" anchor_index:" + FmtToStr(peer_out_data_anchor->GetIdx()) + | |||||
" is out of range:" + FmtToStr(offsets_of_fusion.size()); | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -497,7 +549,9 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || lx_fusion; | bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || lx_fusion; | ||||
vector<int64_t> output_list = peer_op_desc->GetOutputOffset(); | vector<int64_t> output_list = peer_op_desc->GetOutputOffset(); | ||||
if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_list.size())) { | if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_list.size())) { | ||||
std::string error = "index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; | |||||
std::string error = "peer node:" + FmtToStr(peer_op_desc->GetName()) + | |||||
" anchor_index:" + FmtToStr(peer_out_data_anchor->GetIdx()) + | |||||
" is out of range:" + FmtToStr(output_list.size()); | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -506,13 +560,13 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0); | bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0); | ||||
if (is_allocated_first_input) { | if (is_allocated_first_input) { | ||||
std::map<int32_t, int32_t> out2ins; | std::map<int32_t, int32_t> out2ins; | ||||
GE_CHK_STATUS_RET(GetAllRef(node, out2ins), "Node: %s get all ref failed", node->GetName().c_str()); | |||||
GE_CHK_STATUS_RET(GetAllRef(node, out2ins), "[Get][AllRef]fail for node: %s", node->GetName().c_str()); | |||||
// output is beginning offset, set offset for input; only support this case now | // output is beginning offset, set offset for input; only support this case now | ||||
if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) { | if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) { | ||||
auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx()); | auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx()); | ||||
output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first); | output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first); | ||||
peer_op_desc->SetOutputOffset(output_list); | peer_op_desc->SetOutputOffset(output_list); | ||||
GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld.", node->GetName().c_str(), | |||||
GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld", node->GetName().c_str(), | |||||
out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), | out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), | ||||
output_list_this.at(out2ins.begin()->first), peer_output_offset); | output_list_this.at(out2ins.begin()->first), peer_output_offset); | ||||
} else { | } else { | ||||
@@ -542,7 +596,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
} | } | ||||
GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " | GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " | ||||
"size[%zu] realsize[%ld] nopadding size[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), | |||||
"size[%zu] realsize[%ld] nopadding size[%d]", node->GetOwnerComputeGraph()->GetName().c_str(), | |||||
peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(), | peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(), | ||||
output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, | output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, | ||||
is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); | is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); | ||||
@@ -563,17 +617,32 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
Status GetFirstInputPeerOutOutputOffset(const ge::NodePtr &node, int64_t &mem_offset) { | Status GetFirstInputPeerOutOutputOffset(const ge::NodePtr &node, int64_t &mem_offset) { | ||||
auto in_data_anchor_list = node->GetAllInDataAnchors(); | auto in_data_anchor_list = node->GetAllInDataAnchors(); | ||||
if (in_data_anchor_list.empty()) { | if (in_data_anchor_list.empty()) { | ||||
GELOGE(FAILED, "Node %s's in data anchor is empty.", node->GetName().c_str()); | |||||
REPORT_INNER_ERROR("E19999", "InAnchor list empty in node:%s, not expect when GetFirstInputPeerOutOutputOffset", | |||||
node->GetName().c_str()); | |||||
GELOGE(FAILED, "[Get][InAnchor]empty is invalid, node:%s", node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor(); | auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor(); | ||||
GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, GELOGE(ge::FAILED, "peer_out_data_anchor is null."); | |||||
GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "PeerAcnhor is null, " | |||||
"not expect when GetFirstInputPeerOutOutputOffset for node:%s", | |||||
node->GetName().c_str()); | |||||
GELOGE(ge::FAILED, "[Check][PeerAnchor]null is invalid, node:%s", node->GetName().c_str()); | |||||
return ge::FAILED); | return ge::FAILED); | ||||
auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); | auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(peer_op_desc == nullptr, GELOGE(ge::FAILED, "peer_op_desc is null."); return ge::FAILED); | |||||
GE_IF_BOOL_EXEC(peer_op_desc == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "PeerOpDesc is null, " | |||||
"not expect when GetFirstInputPeerOutOutputOffset for node:%s", | |||||
node->GetName().c_str()); | |||||
GELOGE(ge::FAILED, "[Check][PeerOpDesc]null is invalid, node:%s", node->GetName().c_str()); | |||||
return ge::FAILED); | |||||
vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset(); | vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset(); | ||||
if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) { | if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) { | ||||
GELOGE(FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx()); | |||||
REPORT_INNER_ERROR("E19999", "PeerAnchorIndex:%d bigger than in_offset size:%lu, " | |||||
"judge invalid when GetFirstInputPeerOutOutputOffset for node:%s", | |||||
peer_out_data_anchor->GetIdx(), in_node_output_offsets.size(), node->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][Index:PeerOutDataAnchor]PeerIndex:%d bigger than in_offset size:%lu, node:%s", | |||||
peer_out_data_anchor->GetIdx(), in_node_output_offsets.size(), node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
mem_offset = in_node_output_offsets.at(peer_out_data_anchor->GetIdx()); | mem_offset = in_node_output_offsets.at(peer_out_data_anchor->GetIdx()); | ||||
@@ -584,11 +653,18 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node | |||||
uint32_t continuous_type) { | uint32_t continuous_type) { | ||||
GELOGI("Current node %s needs continuous output.", node->GetName().c_str()); | GELOGI("Current node %s needs continuous output.", node->GetName().c_str()); | ||||
auto out_op_desc = node->GetOpDesc(); | auto out_op_desc = node->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); | |||||
GE_IF_BOOL_EXEC(out_op_desc == nullptr, | |||||
REPORT_INNER_ERROR("E19999", "OpDesc is null, " | |||||
"not expect when AssignContinuousOutputMemory for node:%s", | |||||
node->GetName().c_str()); | |||||
GELOGE(ge::FAILED, "[Check][OpDesc]null is invalid, node:%s", node->GetName().c_str())); | |||||
vector<int64_t> output_list = out_op_desc->GetOutputOffset(); | vector<int64_t> output_list = out_op_desc->GetOutputOffset(); | ||||
if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { | if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { | ||||
GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", | |||||
out_op_desc->GetOutputsSize(), output_list.size()); | |||||
REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, invalid in node:%s, " | |||||
"when AssignContinuousOutputMemory", | |||||
out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | |||||
GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s", | |||||
out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
@@ -647,14 +723,18 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||||
map<string, vector<NodePtr>> connecting_output_atomic_nodes; | map<string, vector<NodePtr>> connecting_output_atomic_nodes; | ||||
Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes); | Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes); | ||||
if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
GELOGE(status, "Failed to filter atomic nodes for memory assignment."); | |||||
GELOGE(status, "[Filter][AtomicNode]failed in graph_id:%u, graph_name:%s", | |||||
compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return status; | return status; | ||||
} | } | ||||
auto mem_iter = memory_offset_.find(RT_MEMORY_HBM); | auto mem_iter = memory_offset_.find(RT_MEMORY_HBM); | ||||
if (mem_iter == memory_offset_.end()) { | if (mem_iter == memory_offset_.end()) { | ||||
std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], " | |||||
"not expected when ReAssignAtomicMemory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]" | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -670,7 +750,7 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||||
vector<int64_t> mem_offset_end; | vector<int64_t> mem_offset_end; | ||||
status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end); | status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end); | ||||
if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
GELOGE(status, "Assign atomic output and workspace memory failed, node name is %s.", | |||||
GELOGE(status, "[Assign][Memory]output atomic mem and workspace mem, fail for node name is %s.", | |||||
atomic_node->GetName().c_str()); | atomic_node->GetName().c_str()); | ||||
return status; | return status; | ||||
} | } | ||||
@@ -679,7 +759,7 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||||
int64_t atomic_mem_size = static_cast<int64_t>(mem_iter->second.mem_offset_) - atomic_mem_start; | int64_t atomic_mem_size = static_cast<int64_t>(mem_iter->second.mem_offset_) - atomic_mem_start; | ||||
if (atomic_mem_size != 0) { | if (atomic_mem_size != 0) { | ||||
GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}, RT_MEMORY_HBM), | GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}, RT_MEMORY_HBM), | ||||
"Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str()); | |||||
"[Set][Attr]fail for atomic addr clean node %s.", iter.first->GetName().c_str()); | |||||
} | } | ||||
} | } | ||||
batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_)); | batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_)); | ||||
@@ -690,7 +770,8 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||||
for (auto &iter_batch : connecting_output_atomic_nodes) { | for (auto &iter_batch : connecting_output_atomic_nodes) { | ||||
mem_iter->second.mem_offset_ = batch_atomic_mem_start; | mem_iter->second.mem_offset_ = batch_atomic_mem_start; | ||||
if (AssignConnectNetOutputAtomicMemory(iter_batch.second) != SUCCESS) { | if (AssignConnectNetOutputAtomicMemory(iter_batch.second) != SUCCESS) { | ||||
GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput."); | |||||
GELOGE(FAILED, "[Assign][Memory]for nodes that connect to netoutput failed." | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_)); | batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_)); | ||||
@@ -721,9 +802,10 @@ Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign( | |||||
// If GetBool fail, is_reference is false. | // If GetBool fail, is_reference is false. | ||||
(void) ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference); | (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference); | ||||
if (is_reference) { | if (is_reference) { | ||||
std::string error = "Op" + FmtToStr(peer_in_node_desc->GetName()) + | |||||
" cannot have both atomic and is_reference attribute."; | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "Op:%s cannot have both atomic and is_reference attribute, " | |||||
"not support now", peer_in_node_desc->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][Attr]Op:%s cannot have both atomic and is_reference attribute, " | |||||
"not support now", peer_in_node_desc->GetName().c_str()); | |||||
return ge::PARAM_INVALID; | return ge::PARAM_INVALID; | ||||
} | } | ||||
@@ -761,7 +843,7 @@ Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodeP | |||||
// Assign atomic node output memory | // Assign atomic node output memory | ||||
Status ret = AssignAtomicOutputMemory(node, mem_offset_end); | Status ret = AssignAtomicOutputMemory(node, mem_offset_end); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Failed to assign atomic output memory, node is %s.", node_op_desc->GetName().c_str()); | |||||
GELOGE(ret, "[Assign][Memory:Ouput:Atomic]Failed for node:%s.", node_op_desc->GetName().c_str()); | |||||
return ret; | return ret; | ||||
} | } | ||||
@@ -781,7 +863,7 @@ Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodeP | |||||
ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end); | ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end); | ||||
} | } | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str()); | |||||
GELOGE(ret, "[Assign][Memory:Atomic:Workspace]fail for node:%s.", node_op_desc->GetName().c_str()); | |||||
return ret; | return ret; | ||||
} | } | ||||
} else { | } else { | ||||
@@ -794,8 +876,11 @@ Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodeP | |||||
Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) { | Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) { | ||||
auto iter = memory_offset_.find(RT_MEMORY_HBM); | auto iter = memory_offset_.find(RT_MEMORY_HBM); | ||||
if (iter == memory_offset_.end()) { | if (iter == memory_offset_.end()) { | ||||
std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], " | |||||
"not expected when AssignConnectNetOutputAtomicMemory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]" | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
for (auto &node : connect_netoutput_nodes) { | for (auto &node : connect_netoutput_nodes) { | ||||
@@ -811,13 +896,14 @@ Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> & | |||||
node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start); | node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start); | ||||
vector<int64_t> mem_offset_end; | vector<int64_t> mem_offset_end; | ||||
if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) { | if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) { | ||||
GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str()); | |||||
GELOGE(FAILED, "[Assign][Memory]output atomic mem and workspace mem, fail for node name is %s.", | |||||
node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately. | // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately. | ||||
if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end, RT_MEMORY_HBM) != SUCCESS) { | if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end, RT_MEMORY_HBM) != SUCCESS) { | ||||
GELOGE(FAILED, "Failed to set atomic attr separately."); | |||||
GELOGE(FAILED, "[Set][Attr:IndependentAtomic]fail for node:%s", node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -842,8 +928,11 @@ Status GraphMemoryAssigner::AssignReferenceMemory() { | |||||
vector<int64_t> output_list = out_op_desc->GetOutputOffset(); | vector<int64_t> output_list = out_op_desc->GetOutputOffset(); | ||||
if (out_op_desc->GetOutputsSize() > output_list.size()) { | if (out_op_desc->GetOutputsSize() > output_list.size()) { | ||||
GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", | |||||
out_op_desc->GetOutputsSize(), output_list.size()); | |||||
REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, judge invalid in node:%s " | |||||
"when AssignReferenceMemory", | |||||
out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | |||||
GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s", | |||||
out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str()); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
@@ -896,9 +985,12 @@ bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) { | |||||
} | } | ||||
if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) || | if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) || | ||||
(peer_op_desc->GetType() == VARIABLE)) { | (peer_op_desc->GetType() == VARIABLE)) { | ||||
std::string error = "Op" + FmtToStr(node->GetName()) + "'s peer out node" + | |||||
FmtToStr(peer_op_desc->GetName()) + " is invalid, Constant/AippData/Variable is not supported"; | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "node(type:%s, name:%s) link to atomic node(name:%s), " | |||||
"this situation not supported now", | |||||
peer_op_desc->GetType().c_str(), peer_op_desc->GetName().c_str(), node->GetName().c_str()); | |||||
GELOGE(ge::FAILED, "[Check][Link]node(type:%s, name:%s) link to atomic node(name:%s), " | |||||
"this situation not supported now", | |||||
peer_op_desc->GetType().c_str(), peer_op_desc->GetName().c_str(), node->GetName().c_str()); | |||||
return false; | return false; | ||||
} | } | ||||
} | } | ||||
@@ -918,22 +1010,27 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve | |||||
// Check atomic output | // Check atomic output | ||||
vector<int64_t> output_list = op_desc->GetOutputOffset(); | vector<int64_t> output_list = op_desc->GetOutputOffset(); | ||||
if (atomic_output_index.size() > output_list.size()) { | if (atomic_output_index.size() > output_list.size()) { | ||||
std::string error = "Op" + FmtToStr(node->GetName()) + | |||||
"'s size of atomic_output_index is more than the size of output_list"; | |||||
std::string error = | |||||
"Op:" + FmtToStr(node->GetName()) + "'s size:" + FmtToStr(atomic_output_index.size()) + | |||||
" of atomic_output_index is more than the size:" + FmtToStr(output_list.size()) + " of output_list"; | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | ||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
auto output_list_size = static_cast<int64_t>(output_list.size()); | auto output_list_size = static_cast<int64_t>(output_list.size()); | ||||
auto iter = memory_offset_.find(RT_MEMORY_HBM); | auto iter = memory_offset_.find(RT_MEMORY_HBM); | ||||
if (iter == memory_offset_.end()) { | if (iter == memory_offset_.end()) { | ||||
std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], " | |||||
"not expected when AssignAtomicOutputMemory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]" | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
for (auto &output_index : atomic_output_index) { | for (auto &output_index : atomic_output_index) { | ||||
if (output_index >= output_list_size) { | if (output_index >= output_list_size) { | ||||
std::string error = "Op" + FmtToStr(node->GetName()) + "'s output index" + FmtToStr(output_index) + | |||||
" is more than the size" + FmtToStr(output_list_size) + " of output_list."; | |||||
std::string error = | |||||
"Op:" + FmtToStr(node->GetName()) + "'s atomic_output index:" + FmtToStr(output_index) + | |||||
" is more than the size:" + FmtToStr(output_list_size) + " of output_list."; | |||||
GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str()); | GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str()); | ||||
return ge::PARAM_INVALID; | return ge::PARAM_INVALID; | ||||
} | } | ||||
@@ -941,7 +1038,8 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve | |||||
// If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here | // If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here | ||||
bool is_assigned_mem = false; | bool is_assigned_mem = false; | ||||
if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) { | if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) { | ||||
GELOGE(ge::FAILED, "Failed to get memory assignment of node %s.", node->GetName().c_str()); | |||||
GELOGE(ge::FAILED, "[Get][MemoryAssignmentStatus]fail for node %s, out_index:%ld", | |||||
node->GetName().c_str(), output_index); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
@@ -981,8 +1079,9 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve | |||||
Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index, | Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index, | ||||
bool &is_mem_assigned) { | bool &is_mem_assigned) { | ||||
if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) { | if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) { | ||||
std::string error = "Op" + FmtToStr(node->GetName()) + "'s output index" + FmtToStr(output_index) + | |||||
" is more than the size of node's AllOutDataAnchors."; | |||||
std::string error = | |||||
"Op:" + FmtToStr(node->GetName()) + "'s output index:" + FmtToStr(output_index) + | |||||
" is more than the size:" + FmtToStr(node->GetAllOutDataAnchors().size()) + " of node's AllOutDataAnchors."; | |||||
GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str()); | GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str()); | ||||
return ge::PARAM_INVALID; | return ge::PARAM_INVALID; | ||||
} | } | ||||
@@ -1010,8 +1109,11 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc | |||||
GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str()); | GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str()); | ||||
auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM); | auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM); | ||||
if (mem_type_iter == memory_offset_.end()) { | if (mem_type_iter == memory_offset_.end()) { | ||||
std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], " | |||||
"not expected when AssignOrdinaryAtomicWorkspaceMemory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]" | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
vector<int64_t> workspace_vector = op_desc->GetWorkspace(); | vector<int64_t> workspace_vector = op_desc->GetWorkspace(); | ||||
@@ -1032,8 +1134,9 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc | |||||
auto workspace_index = static_cast<uint64_t>(info_iter.first); | auto workspace_index = static_cast<uint64_t>(info_iter.first); | ||||
auto workspace_size = info_iter.second; | auto workspace_size = info_iter.second; | ||||
if (workspace_index >= workspace_vector.size()) { | if (workspace_index >= workspace_vector.size()) { | ||||
std::string error = "The workspace index" + FmtToStr(workspace_index) + | |||||
" is more than the size" + FmtToStr(workspace_vector.size()) + " of workspace vector."; | |||||
std::string error = "The workspace index:" + FmtToStr(workspace_index) + | |||||
" is more than the size:" + FmtToStr(workspace_vector.size()) + " of workspace vector in op:" + | |||||
op_desc->GetName().c_str(); | |||||
GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str()); | GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str()); | ||||
return ge::PARAM_INVALID; | return ge::PARAM_INVALID; | ||||
} | } | ||||
@@ -1063,8 +1166,11 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt | |||||
GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str()); | GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str()); | ||||
auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM); | auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM); | ||||
if (mem_type_iter == memory_offset_.end()) { | if (mem_type_iter == memory_offset_.end()) { | ||||
std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); | |||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | |||||
REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], " | |||||
"not expected when AssignFusionAtomicWorkspaceMemory, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]" | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
map<string, map<int64_t, int64_t>> sub_node_workspace_offset; | map<string, map<int64_t, int64_t>> sub_node_workspace_offset; | ||||
@@ -1095,7 +1201,10 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt | |||||
sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset)); | sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset)); | ||||
} | } | ||||
if (!(op_desc->SetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, sub_node_workspace_offset))) { | if (!(op_desc->SetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, sub_node_workspace_offset))) { | ||||
GELOGE(FAILED, "Set EXT_ATTR_ATOMIC_WORKSPACE_OFFSET failed, op name:%s.", op_desc->GetName().c_str()); | |||||
REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for node:%s when AssignFusionAtomicWorkspaceMemory", | |||||
EXT_ATTR_ATOMIC_WORKSPACE_OFFSET.c_str(), op_desc->GetName().c_str()); | |||||
GELOGE(FAILED, "[Set][Attr:%s]fail for node:%s.", | |||||
EXT_ATTR_ATOMIC_WORKSPACE_OFFSET.c_str(), op_desc->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -1106,7 +1215,7 @@ Status GraphMemoryAssigner::CheckOffset() { | |||||
std::map<std::string, std::string> anchor_to_symbol; | std::map<std::string, std::string> anchor_to_symbol; | ||||
std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors; | std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors; | ||||
if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { | if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { | ||||
GELOGE(FAILED, "Get ref-mapping for graph %s failed.", compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Get][RefMapping]fail for graph %s", compute_graph_->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | ||||
@@ -1148,7 +1257,6 @@ Status GraphMemoryAssigner::CheckOffset() { | |||||
std::string error = "Invalid workspace" + FmtToStr(ge::kInvalidOffset) + | std::string error = "Invalid workspace" + FmtToStr(ge::kInvalidOffset) + | ||||
+ " in node" + FmtToStr(node->GetName()); | + " in node" + FmtToStr(node->GetName()); | ||||
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); | ||||
GELOGE(FAILED, "Invalid workspace in node: %s workspace: %ld.", node->GetName().c_str(), ge::kInvalidOffset); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -1158,8 +1266,10 @@ Status GraphMemoryAssigner::CheckOffset() { | |||||
ge::Status GraphMemoryAssigner::SetInputOffset() { | ge::Status GraphMemoryAssigner::SetInputOffset() { | ||||
if (memory_offset_.empty()) { | if (memory_offset_.empty()) { | ||||
GELOGE(FAILED, "memory_offset_ is empty."); | |||||
return FAILED; | |||||
REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected when SetInputOffset, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][InnerData:memory_offset_]empty is not expected, " | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | |||||
} | } | ||||
for (auto pair : memory_offset_) { | for (auto pair : memory_offset_) { | ||||
GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(), | GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(), | ||||
@@ -1168,7 +1278,7 @@ ge::Status GraphMemoryAssigner::SetInputOffset() { | |||||
for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { | ||||
if (UpdateOpInputOffset(node) != ge::SUCCESS) { | if (UpdateOpInputOffset(node) != ge::SUCCESS) { | ||||
GELOGE(ge::FAILED, "Update op input offset failed"); | |||||
GELOGE(ge::FAILED, "[Update][Offset:Input]fail for op:%s", node->GetName().c_str()); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -1316,12 +1426,12 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const { | |||||
} | } | ||||
} else if (node->GetType() == DATA_TYPE) { | } else if (node->GetType() == DATA_TYPE) { | ||||
if (UpdateConstArgsOffset(node, input_list) != SUCCESS) { | if (UpdateConstArgsOffset(node, input_list) != SUCCESS) { | ||||
GELOGE(FAILED, "Update data: %s args offset failed.", node->GetName().c_str()); | |||||
GELOGE(FAILED, "[Update][Offset:Input:Const]fail for node:%s ", node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} else { | } else { | ||||
if (UpdateOpInputOffset(node, input_list) != SUCCESS) { | if (UpdateOpInputOffset(node, input_list) != SUCCESS) { | ||||
GELOGE(FAILED, "Update node: %s input offset failed.", node->GetName().c_str()); | |||||
GELOGE(FAILED, "[Update][Offset:Input]fail for node:%s", node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -1361,7 +1471,7 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in | |||||
peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str()); | peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str()); | ||||
if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { | if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { | ||||
if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size, memory_type) != SUCCESS) { | if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size, memory_type) != SUCCESS) { | ||||
GELOGE(FAILED, "Set atomic clean attr failed."); | |||||
GELOGE(FAILED, "[Set][AtomicCleanAttr]fail for node:%s", peer_out_node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -1387,7 +1497,10 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const ve | |||||
(void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); | (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); | ||||
mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | ||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), | GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), | ||||
GELOGE(FAILED, "SetListInt failed."); | |||||
REPORT_INNER_ERROR("E19999", "Set Attr:%s failed when SetAtomicCleanAttr, op_name:%s", | |||||
ATTR_NAME_AUTOMIC_ADD_START.c_str(), node_op_desc->GetName().c_str()); | |||||
GELOGE(FAILED, "[Set][Attr:%s]fail for op_name:%s", | |||||
ATTR_NAME_AUTOMIC_ADD_START.c_str(), node_op_desc->GetName().c_str()); | |||||
return FAILED); | return FAILED); | ||||
std::vector<int64_t> mem_size_vector; | std::vector<int64_t> mem_size_vector; | ||||
@@ -1395,7 +1508,10 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const ve | |||||
(void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); | (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); | ||||
mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | ||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), | GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), | ||||
GELOGE(FAILED, "SetListInt failed."); | |||||
REPORT_INNER_ERROR("E19999", "Set Attr:%s failed when SetAtomicCleanAttr, op_name:%s", | |||||
ATTR_NAME_AUTOMIC_ADD_MEM_SIZE.c_str(), node_op_desc->GetName().c_str()); | |||||
GELOGE(FAILED, "[Set][Attr:%s]fail for op_name:%s", | |||||
ATTR_NAME_AUTOMIC_ADD_MEM_SIZE.c_str(), node_op_desc->GetName().c_str()); | |||||
return FAILED); | return FAILED); | ||||
std::stringstream ss; | std::stringstream ss; | ||||
@@ -1437,12 +1553,14 @@ ge::Status GraphMemoryAssigner::GetNodeListMemoryType(const vector<NodePtr> &nod | |||||
// In the dynamic batch scenario, the memory attributes of nodes are the same. | // In the dynamic batch scenario, the memory attributes of nodes are the same. | ||||
for (auto &n : nodes) { | for (auto &n : nodes) { | ||||
if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { | if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { | ||||
GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed.") | |||||
GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), | |||||
"[Get][MemType:input]fail for node:%s", n->GetName().c_str()) | |||||
break; | break; | ||||
} | } | ||||
if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { | if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { | ||||
GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed."); | |||||
GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), | |||||
"[Get][MemType:output]fail for node:%s", n->GetName().c_str()) | |||||
break; | break; | ||||
} | } | ||||
} | } | ||||
@@ -1478,7 +1596,7 @@ ge::Status GraphMemoryAssigner::GetNodeMemoryType(const NodePtr &node, int64_t & | |||||
} | } | ||||
if (!CheckContinuousMemType(mem_type_list)) { | if (!CheckContinuousMemType(mem_type_list)) { | ||||
GELOGE(FAILED, "Check continuous memory type failed."); | |||||
GELOGE(FAILED, "[Check][MemType:Continuous]fail for node:%s", node->GetName().c_str()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// It is continuous memory and memory type is the same, so use the first memory. | // It is continuous memory and memory type is the same, so use the first memory. | ||||
@@ -1526,7 +1644,11 @@ ge::Status GraphMemoryAssigner::GetAllRef(const NodePtr &node, map<int32_t, int3 | |||||
if (node->GetInDataAnchor(reuse_in_index) != nullptr) { | if (node->GetInDataAnchor(reuse_in_index) != nullptr) { | ||||
out2ins.emplace(out_data_anchor->GetIdx(), reuse_in_index); | out2ins.emplace(out_data_anchor->GetIdx(), reuse_in_index); | ||||
} else { | } else { | ||||
GELOGE(FAILED, "Invalid reuse_input value %d on output %d of node %s, please check attr reuse_input", | |||||
REPORT_INNER_ERROR("E19999", "Invalid reuse_input value %d on output %d of node %s, " | |||||
"please check attr reuse_input", | |||||
reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str()); | |||||
GELOGE(FAILED, "[Check][Attr]Invalid reuse_input value %d on output %d of node %s, " | |||||
"please check attr reuse_input", | |||||
reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str()); | reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str()); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -1549,7 +1671,7 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly( | |||||
auto continuous_type = iter->second; | auto continuous_type = iter->second; | ||||
bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); | bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); | ||||
if (continuous_input) { | if (continuous_input) { | ||||
GELOGI("Node %s 's precursor node %s need assign continuous input memory, store node firstly.", | |||||
GELOGI("Node %s 's precursor node %s need assign continuous input memory, store node firstly", | |||||
input_continuous_node->GetName().c_str(), in_node->GetName().c_str()); | input_continuous_node->GetName().c_str(), in_node->GetName().c_str()); | ||||
return false; | return false; | ||||
} | } | ||||
@@ -1559,7 +1681,7 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly( | |||||
node_2_continuous_type.emplace(out_node, continuous_type); | node_2_continuous_type.emplace(out_node, continuous_type); | ||||
bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); | bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); | ||||
if (continuous_input) { | if (continuous_input) { | ||||
GELOGI("Node %s 's succeed node %s need assign continuous input memory, store node firstly.", | |||||
GELOGI("Node %s 's succeed node %s need assign continuous input memory, store node firstly", | |||||
input_continuous_node->GetName().c_str(), out_node->GetName().c_str()); | input_continuous_node->GetName().c_str(), out_node->GetName().c_str()); | ||||
return false; | return false; | ||||
} | } | ||||
@@ -1575,11 +1697,12 @@ ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(con | |||||
int64_t mem_clean_size = 0; | int64_t mem_clean_size = 0; | ||||
int64_t memory_type = RT_MEMORY_HBM; | int64_t memory_type = RT_MEMORY_HBM; | ||||
GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"), "Get node memory type failed."); | |||||
GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"), | |||||
"[Get][MemType]fail for node:%s", input_continuous_node->GetName().c_str()); | |||||
auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type, | auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type, | ||||
continuous_type, reverse_refresh); | continuous_type, reverse_refresh); | ||||
if (ret != ge::SUCCESS) { | if (ret != ge::SUCCESS) { | ||||
GELOGE(ret, "Assign continuous input memory failed!"); | |||||
GELOGE(ret, "[Assign][Memory:Input:continuous]fail for node:%s", input_continuous_node->GetName().c_str()); | |||||
return ret; | return ret; | ||||
} | } | ||||
@@ -1590,7 +1713,6 @@ ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(con | |||||
if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) { | if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) { | ||||
// check whether there is an atomic conflict between the current node and the peer out node | // check whether there is an atomic conflict between the current node and the peer out node | ||||
if (!CheckInputIsSupportAtomic(input_continuous_node)) { | if (!CheckInputIsSupportAtomic(input_continuous_node)) { | ||||
GELOGE(ge::FAILED, "There is an atomic conflict between the current node and the peer out node, not supported!"); | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
@@ -1602,7 +1724,7 @@ ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(con | |||||
if (peer_out_node->GetType() == ATOMICADDRCLEAN) { | if (peer_out_node->GetType() == ATOMICADDRCLEAN) { | ||||
ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type); | ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str()); | |||||
GELOGE(ret, "[Set][AtomicCleanAttr]fail for node:%s", peer_out_node->GetName().c_str()); | |||||
return ret; | return ret; | ||||
} | } | ||||
} | } | ||||
@@ -385,7 +385,7 @@ Status DataDumper::DumpRefOutput(const DataDumper::InnerDumpInfo &inner_dump_inf | |||||
Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) { | Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) { | ||||
const auto &output_descs = inner_dump_info.op->GetAllOutputsDesc(); | const auto &output_descs = inner_dump_info.op->GetAllOutputsDesc(); | ||||
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op); | |||||
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(*runtime_param_, inner_dump_info.op); | |||||
if (output_descs.size() != output_addrs.size()) { | if (output_descs.size() != output_addrs.size()) { | ||||
GELOGE(PARAM_INVALID, "Invalid output desc addrs size %zu, op %s has %zu output desc.", output_addrs.size(), | GELOGE(PARAM_INVALID, "Invalid output desc addrs size %zu, op %s has %zu output desc.", output_addrs.size(), | ||||
inner_dump_info.op->GetName().c_str(), output_descs.size()); | inner_dump_info.op->GetName().c_str(), output_descs.size()); | ||||
@@ -436,7 +436,7 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump: | |||||
// else data, const or variable op | // else data, const or variable op | ||||
aicpu::dump::Output output; | aicpu::dump::Output output; | ||||
auto output_tensor = inner_dump_info.op->GetOutputDescPtr(inner_dump_info.output_anchor_index); | auto output_tensor = inner_dump_info.op->GetOutputDescPtr(inner_dump_info.output_anchor_index); | ||||
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op); | |||||
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(*runtime_param_, inner_dump_info.op); | |||||
if (output_tensor == nullptr) { | if (output_tensor == nullptr) { | ||||
GELOGE(PARAM_INVALID, "output_tensor is null, index: %d, size: %zu.", inner_dump_info.output_anchor_index, | GELOGE(PARAM_INVALID, "output_tensor is null, index: %d, size: %zu.", inner_dump_info.output_anchor_index, | ||||
inner_dump_info.op->GetOutputsSize()); | inner_dump_info.op->GetOutputsSize()); | ||||
@@ -540,7 +540,7 @@ Status DataDumper::DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info | |||||
Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) { | Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) { | ||||
GELOGI("Start dump input"); | GELOGI("Start dump input"); | ||||
const auto &input_descs = inner_dump_info.op->GetAllInputsDesc(); | const auto &input_descs = inner_dump_info.op->GetAllInputsDesc(); | ||||
const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, inner_dump_info.op); | |||||
const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(*runtime_param_, inner_dump_info.op); | |||||
if (input_descs.size() != input_addrs.size()) { | if (input_descs.size() != input_addrs.size()) { | ||||
GELOGE(PARAM_INVALID, "Invalid input desc addrs size %zu, op %s has %zu input desc.", input_addrs.size(), | GELOGE(PARAM_INVALID, "Invalid input desc addrs size %zu, op %s has %zu input desc.", input_addrs.size(), | ||||
inner_dump_info.op->GetName().c_str(), input_descs.size()); | inner_dump_info.op->GetName().c_str(), input_descs.size()); | ||||
@@ -36,9 +36,21 @@ | |||||
namespace ge { | namespace ge { | ||||
class DataDumper { | class DataDumper { | ||||
public: | public: | ||||
DataDumper() : runtime_param_{} {} | |||||
explicit DataDumper(const RuntimeParam &rsh) : runtime_param_(rsh) {} | |||||
explicit DataDumper(RuntimeParam *rsh) | |||||
: model_name_(), | |||||
model_id_(0), | |||||
runtime_param_(rsh), | |||||
dev_mem_load_(nullptr), | |||||
dev_mem_unload_(nullptr), | |||||
op_list_(), | |||||
input_map_(), | |||||
load_flag_(false), | |||||
device_id_(0), | |||||
global_step_(0), | |||||
loop_per_iter_(0), | |||||
loop_cond_(0), | |||||
compute_graph_(nullptr), | |||||
ref_info_() {} | |||||
~DataDumper(); | ~DataDumper(); | ||||
@@ -93,10 +105,10 @@ class DataDumper { | |||||
// for inference data dump | // for inference data dump | ||||
std::string om_name_; | std::string om_name_; | ||||
uint32_t model_id_ = 0; | |||||
const RuntimeParam &runtime_param_; | |||||
void *dev_mem_load_ = nullptr; | |||||
void *dev_mem_unload_ = nullptr; | |||||
uint32_t model_id_; | |||||
RuntimeParam *runtime_param_; | |||||
void *dev_mem_load_; | |||||
void *dev_mem_unload_; | |||||
struct InnerDumpInfo; | struct InnerDumpInfo; | ||||
struct InnerInputMapping; | struct InnerInputMapping; | ||||
@@ -107,12 +119,12 @@ class DataDumper { | |||||
uint32_t end_graph_stream_id_ = 0; | uint32_t end_graph_stream_id_ = 0; | ||||
bool is_end_graph_ = false; | bool is_end_graph_ = false; | ||||
std::multimap<std::string, InnerInputMapping> input_map_; // release after DavinciModel::Init | std::multimap<std::string, InnerInputMapping> input_map_; // release after DavinciModel::Init | ||||
bool load_flag_ = false; | |||||
uint32_t device_id_ = 0; | |||||
uintptr_t global_step_ = 0; | |||||
uintptr_t loop_per_iter_ = 0; | |||||
uintptr_t loop_cond_ = 0; | |||||
ComputeGraphPtr compute_graph_ = nullptr; // release after DavinciModel::Init | |||||
bool load_flag_; | |||||
uint32_t device_id_; | |||||
uintptr_t global_step_; | |||||
uintptr_t loop_per_iter_; | |||||
uintptr_t loop_cond_; | |||||
ComputeGraphPtr compute_graph_; // release after DavinciModel::Init | |||||
std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init | std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init | ||||
void *l1_fusion_addr_ = nullptr; | void *l1_fusion_addr_ = nullptr; | ||||
@@ -31,6 +31,7 @@ | |||||
#include "common/scope_guard.h" | #include "common/scope_guard.h" | ||||
#include "common/thread_pool.h" | #include "common/thread_pool.h" | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "framework/common/util.h" | |||||
#include "graph/common/ge_call_wrapper.h" | #include "graph/common/ge_call_wrapper.h" | ||||
#include "graph/compute_graph.h" | #include "graph/compute_graph.h" | ||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
@@ -184,7 +185,7 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener | |||||
last_execute_mode_(INITIALIZATION), | last_execute_mode_(INITIALIZATION), | ||||
session_id_(0), | session_id_(0), | ||||
device_id_(0), | device_id_(0), | ||||
maxDumpOpNum_(0), data_dumper_(runtime_param_), | |||||
maxDumpOpNum_(0), data_dumper_(&runtime_param_), | |||||
iterator_count_(0), | iterator_count_(0), | ||||
is_l1_fusion_enable_(false), | is_l1_fusion_enable_(false), | ||||
is_first_execute_(true) { | is_first_execute_(true) { | ||||
@@ -297,6 +298,11 @@ void DavinciModel::ReleaseTask() { | |||||
GE_CHK_STATUS(task->Release(), "Release task failed."); | GE_CHK_STATUS(task->Release(), "Release task failed."); | ||||
} | } | ||||
} | } | ||||
for (auto &item : label_goto_args_) { | |||||
GE_FREE_RT_LOG(item.second.first); | |||||
} | |||||
label_goto_args_.clear(); | |||||
} | } | ||||
Status DavinciModel::Assign(const GeModelPtr &ge_model) { | Status DavinciModel::Assign(const GeModelPtr &ge_model) { | ||||
@@ -654,12 +660,12 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
runtime_param_.graph_id = compute_graph->GetGraphID(); | runtime_param_.graph_id = compute_graph->GetGraphID(); | ||||
// op debug register | // op debug register | ||||
GE_CHK_STATUS_RET(OpDebugRegister(), "OpDebugRegister failed."); | |||||
GE_CHK_STATUS_RET(OpDebugRegister(), "OpDebugRegister failed"); | |||||
GE_TIMESTAMP_START(TransAllVarData); | GE_TIMESTAMP_START(TransAllVarData); | ||||
GE_CHK_STATUS_RET(TransAllVarData(compute_graph, runtime_param_.graph_id), "TransAllVarData failed."); | |||||
GE_CHK_STATUS_RET(TransAllVarData(compute_graph, runtime_param_.graph_id), "TransAllVarData failed"); | |||||
GE_TIMESTAMP_END(TransAllVarData, "GraphLoader::TransAllVarData"); | GE_TIMESTAMP_END(TransAllVarData, "GraphLoader::TransAllVarData"); | ||||
GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(compute_graph, session_id_, device_id_), "copy var data failed."); | |||||
GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(compute_graph, session_id_, device_id_), "copy var data failed"); | |||||
GE_TIMESTAMP_START(InitModelMem); | GE_TIMESTAMP_START(InitModelMem); | ||||
GELOGD("Known node is %d.", known_node_); | GELOGD("Known node is %d.", known_node_); | ||||
@@ -667,7 +673,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
if (!known_node_) { | if (!known_node_) { | ||||
GE_CHK_STATUS_RET_NOLOG(InitFeatureMapAndP2PMem(dev_ptr, mem_size)); | GE_CHK_STATUS_RET_NOLOG(InitFeatureMapAndP2PMem(dev_ptr, mem_size)); | ||||
data_inputer_ = new (std::nothrow) DataInputer(); | data_inputer_ = new (std::nothrow) DataInputer(); | ||||
GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr."); | |||||
GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr"); | |||||
} | } | ||||
fixed_mem_base_ = reinterpret_cast<uintptr_t>(mem_base_); | fixed_mem_base_ = reinterpret_cast<uintptr_t>(mem_base_); | ||||
GE_TIMESTAMP_END(InitModelMem, "GraphLoader::InitModelMem"); | GE_TIMESTAMP_END(InitModelMem, "GraphLoader::InitModelMem"); | ||||
@@ -1334,6 +1340,39 @@ void DavinciModel::ParseDynamicOutShape(const std::vector<std::string> &str_info | |||||
} | } | ||||
} | } | ||||
Status DavinciModel::GetLabelGotoAddr(uint32_t label_index, rtMemType_t mem_type, void *&arg_addr, uint32_t &arg_size) { | |||||
std::lock_guard<std::mutex> lock(label_args_mutex_); | |||||
auto it = label_goto_args_.find(label_index); | |||||
if (it != label_goto_args_.end()) { | |||||
arg_addr = it->second.first; | |||||
arg_size = it->second.second; | |||||
return SUCCESS; | |||||
} | |||||
if (label_index >= label_list_.size()) { | |||||
GELOGE(INTERNAL_ERROR, "Invalid label id:%u, label size:%zu", label_index, label_list_.size()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
GE_CHECK_NOTNULL(label_list_[label_index]); | |||||
vector<rtLabel_t> label_used = { label_list_[label_index] }; | |||||
arg_size = label_used.size() * sizeof(rtLabelDevInfo); | |||||
rtError_t rt_ret = rtMalloc(&arg_addr, arg_size, mem_type); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
label_goto_args_[label_index] = { arg_addr, arg_size }; | |||||
rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), arg_addr, arg_size); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rtLabelListCpy failed, error: %#x", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief LabelSet Op Initialize. | /// @brief LabelSet Op Initialize. | ||||
/// @param [in] op_desc: LabelSet Op descriptor. | /// @param [in] op_desc: LabelSet Op descriptor. | ||||
@@ -273,6 +273,8 @@ class DavinciModel { | |||||
const vector<rtLabel_t> &GetLabelList() const { return label_list_; } | const vector<rtLabel_t> &GetLabelList() const { return label_list_; } | ||||
Status GetLabelGotoAddr(uint32_t label_index, rtMemType_t memory_type, void *&addr, uint32_t &size); | |||||
Status DestroyThread(); | Status DestroyThread(); | ||||
// get Op | // get Op | ||||
@@ -930,6 +932,9 @@ class DavinciModel { | |||||
vector<rtLabel_t> label_list_; | vector<rtLabel_t> label_list_; | ||||
set<uint32_t> label_id_indication_; | set<uint32_t> label_id_indication_; | ||||
mutex label_args_mutex_; | |||||
map<uint32_t, pair<void *, uint32_t>> label_goto_args_; | |||||
mutex outside_addrs_mutex_; | mutex outside_addrs_mutex_; | ||||
vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr. | vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr. | ||||
set<const void *> copy_only_addrs_; // Address need copy to original place. | set<const void *> copy_only_addrs_; // Address need copy to original place. | ||||
@@ -297,12 +297,11 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||||
if (model_id == INVALID_MODEL_ID) { | if (model_id == INVALID_MODEL_ID) { | ||||
GenModelId(&model_id); | GenModelId(&model_id); | ||||
} | } | ||||
bool is_shape_unknown = false; | |||||
auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||||
string model_name = ""; | string model_name = ""; | ||||
GE_CHK_STATUS_RET(ge_root_model->CheckIsUnknownShape(is_shape_unknown), "CheckIsUnknownShape failed, model id:%u", | |||||
model_id); | |||||
if (is_shape_unknown || GetContext().GetHostExecFlag()) { | |||||
bool is_shape_unknown = ge_root_model->GetRootGraph()->GetGraphUnknownFlag(); | |||||
// if multi subgraph is known, do hybrid load process | |||||
if (is_shape_unknown || GetContext().GetHostExecFlag() || (name_to_model.size() > 1)) { | |||||
return DoLoadHybridModelOnline(model_id, model_name, ge_root_model, listener); | return DoLoadHybridModelOnline(model_id, model_name, ge_root_model, listener); | ||||
} | } | ||||
@@ -324,7 +323,6 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||||
auto root_graph = ge_root_model->GetRootGraph(); | auto root_graph = ge_root_model->GetRootGraph(); | ||||
GE_CHECK_NOTNULL(root_graph); | GE_CHECK_NOTNULL(root_graph); | ||||
string root_model_name = root_graph->GetName(); | string root_model_name = root_graph->GetName(); | ||||
auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||||
GeModelPtr ge_model = name_to_model[root_model_name]; | GeModelPtr ge_model = name_to_model[root_model_name]; | ||||
Status ret = SUCCESS; | Status ret = SUCCESS; | ||||
do { | do { | ||||
@@ -17,9 +17,15 @@ | |||||
#include "graph/load/model_manager/task_info/label_goto_ex_task_info.h" | #include "graph/load/model_manager/task_info/label_goto_ex_task_info.h" | ||||
#include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
#include "graph/debug/ge_attr_define.h" | |||||
namespace ge { | namespace ge { | ||||
constexpr uint8_t kGotoBranchMax = 1; | |||||
LabelGotoExTaskInfo::~LabelGotoExTaskInfo() { | |||||
args_ = nullptr; | |||||
GE_FREE_RT_LOG(index_value_); | |||||
} | |||||
Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | ||||
GELOGI("LabelGotoExTaskInfo Init Start."); | GELOGI("LabelGotoExTaskInfo Init Start."); | ||||
GE_CHECK_NOTNULL(davinci_model); | GE_CHECK_NOTNULL(davinci_model); | ||||
@@ -28,7 +34,7 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// Get LabelGoto task def | |||||
// Get LabelGotoEx task def | |||||
const domi::LabelGotoExDef &label_goto = task_def.label_goto_ex(); | const domi::LabelGotoExDef &label_goto = task_def.label_goto_ex(); | ||||
OpDescPtr op_desc = davinci_model->GetOpByIndex(label_goto.op_index()); | OpDescPtr op_desc = davinci_model->GetOpByIndex(label_goto.op_index()); | ||||
if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
@@ -43,20 +49,38 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
const vector<rtLabel_t> &label_list = davinci_model->GetLabelList(); | |||||
if (label_index >= label_list.size()) { | |||||
GELOGE(PARAM_INVALID, "LabelGotoExTaskInfo: Invalid label id:%u, label size:%zu", label_index, label_list.size()); | |||||
return INTERNAL_ERROR; | |||||
rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM; | |||||
GELOGI("memory_type: %u", memory_type); | |||||
GE_CHK_STATUS_RET_NOLOG(davinci_model->GetLabelGotoAddr(label_index, memory_type, args_, args_size_)); | |||||
rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), memory_type); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
label_ = label_list[label_index]; | |||||
GELOGI("LabelGotoExTaskInfo Init Success, label id:%u, label:%p.", label_index, label_); | |||||
uint64_t branch_index = 0; | |||||
rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &branch_index, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rtMemcpy failed, error: %#x", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
GELOGI("LabelGotoExTaskInfo Init Success, label id:%u", label_index); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status LabelGotoExTaskInfo::Distribute() { | Status LabelGotoExTaskInfo::Distribute() { | ||||
GELOGI("LabelGotoExTaskInfo Distribute Start."); | GELOGI("LabelGotoExTaskInfo Distribute Start."); | ||||
rtError_t rt_ret = rtLabelGotoEx(label_, stream_); | |||||
GE_CHECK_NOTNULL(args_); | |||||
GE_CHECK_NOTNULL(index_value_); | |||||
if (args_size_ == 0) { | |||||
GELOGE(PARAM_INVALID, "branch max: %u, args size: %u invalid.", kGotoBranchMax, args_size_); | |||||
return PARAM_INVALID; | |||||
} | |||||
rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, kGotoBranchMax, args_, stream_); | |||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
@@ -14,24 +14,26 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||||
#ifndef GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||||
#define GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||||
#include "graph/load/model_manager/task_info/task_info.h" | #include "graph/load/model_manager/task_info/task_info.h" | ||||
namespace ge { | namespace ge { | ||||
class LabelGotoExTaskInfo : public TaskInfo { | class LabelGotoExTaskInfo : public TaskInfo { | ||||
public: | public: | ||||
LabelGotoExTaskInfo() : label_(nullptr) {} | |||||
LabelGotoExTaskInfo() = default; | |||||
~LabelGotoExTaskInfo() override { label_ = nullptr; } | |||||
~LabelGotoExTaskInfo() override; | |||||
Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | ||||
Status Distribute() override; | Status Distribute() override; | ||||
private: | private: | ||||
void *label_; | |||||
void *index_value_{nullptr}; // switch index input. | |||||
void *args_{nullptr}; // label info memory. | |||||
uint32_t args_size_{0}; // label info length. | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | |||||
#endif // GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ |
@@ -14,8 +14,8 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||||
#ifndef GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||||
#define GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||||
#include "graph/load/model_manager/task_info/task_info.h" | #include "graph/load/model_manager/task_info/task_info.h" | ||||
@@ -34,4 +34,4 @@ class LabelSetTaskInfo : public TaskInfo { | |||||
void *label_; | void *label_; | ||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | |||||
#endif // GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ |
@@ -16,20 +16,13 @@ | |||||
#include "graph/load/model_manager/task_info/label_switch_by_index_task_info.h" | #include "graph/load/model_manager/task_info/label_switch_by_index_task_info.h" | ||||
#include "graph/debug/ge_attr_define.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
namespace ge { | namespace ge { | ||||
constexpr uint8_t kLabelSwitchIndexNum = 1; | constexpr uint8_t kLabelSwitchIndexNum = 1; | ||||
LabelSwitchByIndexTaskInfo::~LabelSwitchByIndexTaskInfo() { | LabelSwitchByIndexTaskInfo::~LabelSwitchByIndexTaskInfo() { | ||||
if (args_ != nullptr) { | |||||
rtError_t ret = rtFree(args_); | |||||
if (ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret); | |||||
} | |||||
} | |||||
args_ = nullptr; | |||||
GE_FREE_RT_LOG(args_); | |||||
index_value_ = nullptr; | index_value_ = nullptr; | ||||
} | } | ||||
@@ -37,13 +30,12 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo | |||||
GELOGI("LabelSwitchByIndexTaskInfo Init Start."); | GELOGI("LabelSwitchByIndexTaskInfo Init Start."); | ||||
GE_CHECK_NOTNULL(davinci_model); | GE_CHECK_NOTNULL(davinci_model); | ||||
const vector<rtLabel_t> &label_list = davinci_model->GetLabelList(); | |||||
Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList()); | Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList()); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// Get LabelSwitch task def | |||||
// Get LabelSwitchByIndex task def | |||||
const domi::LabelSwitchByIndexDef &label_switch = task_def.label_switch_by_index(); | const domi::LabelSwitchByIndexDef &label_switch = task_def.label_switch_by_index(); | ||||
OpDescPtr op_desc = davinci_model->GetOpByIndex(label_switch.op_index()); | OpDescPtr op_desc = davinci_model->GetOpByIndex(label_switch.op_index()); | ||||
if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
@@ -68,7 +60,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo | |||||
davinci_model->DisableZeroCopy(index_value_); | davinci_model->DisableZeroCopy(index_value_); | ||||
std::vector<uint32_t> label_idx_list; | |||||
vector<uint32_t> label_idx_list; | |||||
if (!AttrUtils::GetListInt(op_desc, ATTR_NAME_LABEL_SWITCH_LIST, label_idx_list)) { | if (!AttrUtils::GetListInt(op_desc, ATTR_NAME_LABEL_SWITCH_LIST, label_idx_list)) { | ||||
GELOGE(INTERNAL_ERROR, "LabelSwitchByIndexTaskInfo: %s Get attr %s failed.", op_desc->GetName().c_str(), | GELOGE(INTERNAL_ERROR, "LabelSwitchByIndexTaskInfo: %s Get attr %s failed.", op_desc->GetName().c_str(), | ||||
ATTR_NAME_LABEL_SWITCH_LIST.c_str()); | ATTR_NAME_LABEL_SWITCH_LIST.c_str()); | ||||
@@ -81,7 +73,8 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
label_list_.resize(branch_max_, nullptr); | |||||
vector<rtLabel_t> label_used(branch_max_, nullptr); | |||||
const vector<rtLabel_t> &label_list = davinci_model->GetLabelList(); | |||||
for (size_t idx = 0; idx < label_idx_list.size(); ++idx) { | for (size_t idx = 0; idx < label_idx_list.size(); ++idx) { | ||||
uint32_t label_id = label_idx_list[idx]; | uint32_t label_id = label_idx_list[idx]; | ||||
if (label_id >= label_list.size()) { | if (label_id >= label_list.size()) { | ||||
@@ -90,8 +83,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
GE_CHECK_NOTNULL(label_list[label_id]); | GE_CHECK_NOTNULL(label_list[label_id]); | ||||
label_list_[idx] = label_list[label_id]; | |||||
label_used[idx] = label_list[label_id]; | |||||
} | } | ||||
rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM; | rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM; | ||||
@@ -103,7 +95,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
} | } | ||||
rt_ret = rtLabelListCpy(label_list_.data(), label_list_.size(), args_, args_size_); | |||||
rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), args_, args_size_); | |||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
@@ -125,7 +117,7 @@ Status LabelSwitchByIndexTaskInfo::Distribute() { | |||||
rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, branch_max_, args_, stream_); | rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, branch_max_, args_, stream_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GELOGI("LabelSwitchByIndexTaskInfo Distribute Success."); | GELOGI("LabelSwitchByIndexTaskInfo Distribute Success."); | ||||
@@ -14,16 +14,15 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||||
#ifndef GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||||
#define GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||||
#include "graph/load/model_manager/task_info/task_info.h" | #include "graph/load/model_manager/task_info/task_info.h" | ||||
namespace ge { | namespace ge { | ||||
class LabelSwitchByIndexTaskInfo : public TaskInfo { | class LabelSwitchByIndexTaskInfo : public TaskInfo { | ||||
public: | public: | ||||
LabelSwitchByIndexTaskInfo() | |||||
: index_value_(nullptr), branch_max_(0), args_(nullptr), args_size_(0), fixed_addr_offset_(0) {} | |||||
LabelSwitchByIndexTaskInfo() = default; | |||||
~LabelSwitchByIndexTaskInfo() override; | ~LabelSwitchByIndexTaskInfo() override; | ||||
@@ -34,12 +33,11 @@ class LabelSwitchByIndexTaskInfo : public TaskInfo { | |||||
Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | ||||
private: | private: | ||||
void *index_value_; // switch index input. | |||||
uint32_t branch_max_; // max branch count. | |||||
void *args_; // label info memory. | |||||
uint32_t args_size_; // label info length. | |||||
std::vector<rtLabel_t> label_list_; | |||||
int64_t fixed_addr_offset_; | |||||
void *index_value_{nullptr}; // switch index input. | |||||
uint32_t branch_max_{0}; // max branch count. | |||||
void *args_{nullptr}; // label info memory. | |||||
uint32_t args_size_{0}; // label info length. | |||||
int64_t fixed_addr_offset_{0}; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | |||||
#endif // GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ |
@@ -40,7 +40,7 @@ static bool BlockComparator(const Block *left, const Block *right) { | |||||
} | } | ||||
bool CanMerge(Block *block) { | bool CanMerge(Block *block) { | ||||
if (block == nullptr || block->allocated || !block->IsSplit()) { | |||||
if ((block == nullptr) || block->allocated || !block->IsSplit()) { | |||||
return false; | return false; | ||||
} | } | ||||
return true; | return true; | ||||
@@ -52,7 +52,7 @@ size_t GetBinIndex(size_t size) { | |||||
if (size <= range) { | if (size <= range) { | ||||
break; | break; | ||||
} | } | ||||
++index; | |||||
index++; | |||||
} | } | ||||
if (index > kNumBins - 1) { | if (index > kNumBins - 1) { | ||||
index = kNumBins - 1; | index = kNumBins - 1; | ||||
@@ -87,15 +87,15 @@ bool ShouldSplit(const Block *block, size_t size) { | |||||
void IncreaseCount(std::map<size_t, size_t> &count, size_t size) { | void IncreaseCount(std::map<size_t, size_t> &count, size_t size) { | ||||
auto it = count.find(size); | auto it = count.find(size); | ||||
if (it != count.end()) { | |||||
it->second++; | |||||
} else { | |||||
if (it == count.end()) { | |||||
count.emplace(size, 1); | count.emplace(size, 1); | ||||
} else { | |||||
it->second++; | |||||
} | } | ||||
} | } | ||||
CachingAllocator::CachingAllocator(rtMemType_t memory_type) : memory_type_(memory_type), memory_allocator_(nullptr) { | CachingAllocator::CachingAllocator(rtMemType_t memory_type) : memory_type_(memory_type), memory_allocator_(nullptr) { | ||||
for (uint32_t i = 0; i < kNumBins; ++i) { | |||||
for (uint32_t i = 0; i < kNumBins; i++) { | |||||
free_block_bins_[i] = nullptr; | free_block_bins_[i] = nullptr; | ||||
} | } | ||||
} | } | ||||
@@ -105,7 +105,7 @@ Status CachingAllocator::Initialize(uint32_t device_id) { | |||||
// when redo Initialize free old memory | // when redo Initialize free old memory | ||||
FreeBlocks(); | FreeBlocks(); | ||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
for (uint32_t i = 0; i < kNumBins; ++i) { | |||||
for (uint32_t i = 0; i < kNumBins; i++) { | |||||
if (free_block_bins_[i] != nullptr) { | if (free_block_bins_[i] != nullptr) { | ||||
continue; | continue; | ||||
} | } | ||||
@@ -132,18 +132,18 @@ void CachingAllocator::Finalize(uint32_t device_id) { | |||||
uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) { | uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) { | ||||
GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id); | GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id); | ||||
uint8_t *ptr = nullptr; | |||||
size = GetBlockSize(size); | size = GetBlockSize(size); | ||||
uint8_t *ptr = nullptr; | |||||
Block *block = FindFreeBlock(size, org_ptr, device_id); | Block *block = FindFreeBlock(size, org_ptr, device_id); | ||||
if (block != nullptr) { | |||||
ptr = block->ptr; | |||||
} else { | |||||
if (block == nullptr) { | |||||
if (ge::SUCCESS == TryExtendCache(size, device_id)) { | if (ge::SUCCESS == TryExtendCache(size, device_id)) { | ||||
block = FindFreeBlock(size, org_ptr, device_id); | block = FindFreeBlock(size, org_ptr, device_id); | ||||
if (block != nullptr) { | if (block != nullptr) { | ||||
ptr = block->ptr; | ptr = block->ptr; | ||||
} | } | ||||
} | } | ||||
} else { | |||||
ptr = block->ptr; | |||||
} | } | ||||
if (ptr == nullptr) { | if (ptr == nullptr) { | ||||
GELOGE(FAILED, "Malloc failed device id = %u, size= %zu", device_id, size); | GELOGE(FAILED, "Malloc failed device id = %u, size= %zu", device_id, size); | ||||
@@ -171,7 +171,7 @@ Status CachingAllocator::Free(uint8_t *ptr, uint32_t device_id) { | |||||
} | } | ||||
void CachingAllocator::FreeBlock(Block *block) { | void CachingAllocator::FreeBlock(Block *block) { | ||||
if (block == nullptr || !block->allocated) { | |||||
if ((block == nullptr) || !block->allocated) { | |||||
return; | return; | ||||
} | } | ||||
GELOGI("Free block size = %zu", block->size); | GELOGI("Free block size = %zu", block->size); | ||||
@@ -187,7 +187,7 @@ void CachingAllocator::FreeBlock(Block *block) { | |||||
} | } | ||||
void CachingAllocator::MergeBlocks(Block *dst, Block *src, BlockBin &bin) { | void CachingAllocator::MergeBlocks(Block *dst, Block *src, BlockBin &bin) { | ||||
if (!CanMerge(dst) || !CanMerge(src)) { | |||||
if (!CanMerge(src) || !CanMerge(dst)) { | |||||
return; | return; | ||||
} | } | ||||
@@ -316,7 +316,7 @@ size_t CachingAllocator::FreeCachedBlocks() { | |||||
GELOGI("Free cached blocks"); | GELOGI("Free cached blocks"); | ||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
size_t free_cached_memory_size = 0; | size_t free_cached_memory_size = 0; | ||||
for (uint32_t i = 0; i < kNumBins; ++i) { | |||||
for (uint32_t i = 0; i < kNumBins; i++) { | |||||
auto pool = free_block_bins_[i]; | auto pool = free_block_bins_[i]; | ||||
if (pool == nullptr) { | if (pool == nullptr) { | ||||
continue; | continue; | ||||
@@ -324,7 +324,8 @@ size_t CachingAllocator::FreeCachedBlocks() { | |||||
for (auto it = pool->begin(); it != pool->end();) { | for (auto it = pool->begin(); it != pool->end();) { | ||||
Block *block = *it; | Block *block = *it; | ||||
// free block memory that has not been split | // free block memory that has not been split | ||||
if ((block != nullptr) && (block->ptr != nullptr) && (block->prev == nullptr) && (block->next == nullptr) && | |||||
if ((block != nullptr) && (block->ptr != nullptr) && | |||||
(block->prev == nullptr) && (block->next == nullptr) && | |||||
(memory_allocator_->FreeMemory(block->ptr) == ge::SUCCESS)) { | (memory_allocator_->FreeMemory(block->ptr) == ge::SUCCESS)) { | ||||
auto itcount = malloced_memory_.find(block->size); | auto itcount = malloced_memory_.find(block->size); | ||||
free_cached_memory_size += block->size; | free_cached_memory_size += block->size; | ||||
@@ -345,7 +346,7 @@ size_t CachingAllocator::FreeCachedBlocks() { | |||||
} | } | ||||
void CachingAllocator::FreeBlocks() { | void CachingAllocator::FreeBlocks() { | ||||
GELOGI("Free blocks"); | |||||
GELOGI("Free blocks."); | |||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
// free allocated blocks and put to cache | // free allocated blocks and put to cache | ||||
for (auto &it : allocated_blocks_) { | for (auto &it : allocated_blocks_) { | ||||
@@ -356,9 +357,9 @@ void CachingAllocator::FreeBlocks() { | |||||
} | } | ||||
void CachingAllocator::FreeBlockBins() { | void CachingAllocator::FreeBlockBins() { | ||||
GELOGI("Free block bins"); | |||||
GELOGI("Free block bins."); | |||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
for (uint32_t i = 0; i < kNumBins; ++i) { | |||||
for (uint32_t i = 0; i < kNumBins; i++) { | |||||
if (free_block_bins_[i] != nullptr) { | if (free_block_bins_[i] != nullptr) { | ||||
delete free_block_bins_[i]; | delete free_block_bins_[i]; | ||||
free_block_bins_[i] = nullptr; | free_block_bins_[i] = nullptr; | ||||
@@ -367,9 +368,9 @@ void CachingAllocator::FreeBlockBins() { | |||||
} | } | ||||
void PrintCount(std::map<size_t, size_t> &count, const std::string &name, size_t total_size, size_t total_count) { | void PrintCount(std::map<size_t, size_t> &count, const std::string &name, size_t total_size, size_t total_count) { | ||||
GELOGI("%6s total[size:%10zu count:%10zu]", name.c_str(), total_size, total_count); | |||||
GELOGI("%6s total[size:%10zu count:%10zu].", name.c_str(), total_size, total_count); | |||||
for (auto &it : count) { | for (auto &it : count) { | ||||
GELOGI(" |- block[size:%10zu count:%10zu]", it.first, it.second); | |||||
GELOGI(" |- block[size:%10zu count:%10zu].", it.first, it.second); | |||||
} | } | ||||
} | } | ||||
@@ -383,20 +384,20 @@ void CachingAllocator::PrintStatics() { | |||||
size_t total_free_count = 0; | size_t total_free_count = 0; | ||||
size_t total_malloc_size = 0; | size_t total_malloc_size = 0; | ||||
size_t total_malloc_count = 0; | size_t total_malloc_count = 0; | ||||
std::map<size_t, size_t> using_block; | |||||
std::map<size_t, size_t> free_block; | |||||
std::map<size_t, size_t> malloc_block; | |||||
std::map<size_t, size_t> using_block_stat; | |||||
std::map<size_t, size_t> free_block_stat; | |||||
std::map<size_t, size_t> malloc_block_stat; | |||||
do { | do { | ||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
for (uint32_t i = 0; i < kNumBins; ++i) { | |||||
for (uint32_t i = 0; i < kNumBins; i++) { | |||||
auto pool = free_block_bins_[i]; | auto pool = free_block_bins_[i]; | ||||
if (pool == nullptr) { | if (pool == nullptr) { | ||||
continue; | continue; | ||||
} | } | ||||
for (auto it = pool->begin(); it != pool->end(); ++it) { | |||||
for (auto it = pool->begin(); it != pool->end(); it++) { | |||||
if ((*it) != nullptr) { | if ((*it) != nullptr) { | ||||
total_free_size += (*it)->size; | total_free_size += (*it)->size; | ||||
IncreaseCount(free_block, (*it)->size); | |||||
IncreaseCount(free_block_stat, (*it)->size); | |||||
total_free_count++; | total_free_count++; | ||||
} | } | ||||
} | } | ||||
@@ -405,7 +406,7 @@ void CachingAllocator::PrintStatics() { | |||||
for (auto &it : allocated_blocks_) { | for (auto &it : allocated_blocks_) { | ||||
if (it.second != nullptr) { | if (it.second != nullptr) { | ||||
total_using_size += it.second->size; | total_using_size += it.second->size; | ||||
IncreaseCount(using_block, it.second->size); | |||||
IncreaseCount(using_block_stat, it.second->size); | |||||
total_using_count++; | total_using_count++; | ||||
} | } | ||||
} | } | ||||
@@ -413,12 +414,12 @@ void CachingAllocator::PrintStatics() { | |||||
for (auto &it : malloced_memory_) { | for (auto &it : malloced_memory_) { | ||||
total_malloc_size += it.first * it.second; | total_malloc_size += it.first * it.second; | ||||
total_malloc_count += it.second; | total_malloc_count += it.second; | ||||
malloc_block[it.first] = it.second; | |||||
malloc_block_stat[it.first] = it.second; | |||||
} | } | ||||
} while (0); | } while (0); | ||||
PrintCount(malloc_block, "Malloc", total_malloc_size, total_malloc_count); | |||||
PrintCount(using_block, "Using", total_using_size, total_using_count); | |||||
PrintCount(free_block, "Free", total_free_size, total_free_count); | |||||
PrintCount(malloc_block_stat, "Malloc", total_malloc_size, total_malloc_count); | |||||
PrintCount(using_block_stat, "Using", total_using_size, total_using_count); | |||||
PrintCount(free_block_stat, "Free", total_free_size, total_free_count); | |||||
} | } | ||||
} // namespace ge | } // namespace ge |
@@ -359,7 +359,10 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||||
std::shared_ptr<Graph> graph_ptr = MakeShared<ge::Graph>(graph); | std::shared_ptr<Graph> graph_ptr = MakeShared<ge::Graph>(graph); | ||||
GE_IF_BOOL_EXEC(graph_ptr == nullptr, GELOGE(FAILED, "GraphPtr make shared failed"); | GE_IF_BOOL_EXEC(graph_ptr == nullptr, GELOGE(FAILED, "GraphPtr make shared failed"); | ||||
return FAILED); | return FAILED); | ||||
// update option about tuning graph | |||||
ParseOption(options, BUILD_MODE, options_.build_mode); | |||||
ParseOption(options, BUILD_STEP, options_.build_step); | |||||
ParseOption(options, TUNING_PATH, options_.tuning_path); | |||||
graph_node->SetGraph(graph_ptr); | graph_node->SetGraph(graph_ptr); | ||||
graph_node->SetOptions(options); | graph_node->SetOptions(options); | ||||
AddGraphNode(graph_id, graph_node); | AddGraphNode(graph_id, graph_node); | ||||
@@ -433,6 +436,10 @@ Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &grap | |||||
GELOGE(FAILED, "GraphPtr make shared failed"); | GELOGE(FAILED, "GraphPtr make shared failed"); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// update option about tuning graph | |||||
ParseOption(options, BUILD_MODE, options_.build_mode); | |||||
ParseOption(options, BUILD_STEP, options_.build_step); | |||||
ParseOption(options, TUNING_PATH, options_.tuning_path); | |||||
graph_node->SetGraph(graph_ptr); | graph_node->SetGraph(graph_ptr); | ||||
graph_node->SetOptions(options); | graph_node->SetOptions(options); | ||||
@@ -1466,6 +1473,10 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti | |||||
GE_IF_BOOL_EXEC(ret != SUCCESS, | GE_IF_BOOL_EXEC(ret != SUCCESS, | ||||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.compressFlag value is invalid, must be 0 or 1."); | GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.compressFlag value is invalid, must be 0 or 1."); | ||||
return GE_GRAPH_OPTIONS_INVALID); | return GE_GRAPH_OPTIONS_INVALID); | ||||
// Set Build model and step | |||||
ParseOption(options, BUILD_MODE, options_.build_mode); | |||||
ParseOption(options, BUILD_STEP, options_.build_step); | |||||
ParseOption(options, BUILD_STEP, options_.tuning_path); | |||||
// ge.graphType. | // ge.graphType. | ||||
options_.run_graph_flag = true; | options_.run_graph_flag = true; | ||||
@@ -1514,10 +1525,6 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti | |||||
GELOGD("Dynamic dims params: input shape is %s, dynamic dims is %s, dynamic node type is %d", | GELOGD("Dynamic dims params: input shape is %s, dynamic dims is %s, dynamic node type is %d", | ||||
options_.input_shape.c_str(), options_.dynamic_dims.c_str(), options_.dynamic_node_type); | options_.input_shape.c_str(), options_.dynamic_dims.c_str(), options_.dynamic_node_type); | ||||
// Set Build model and step | |||||
ParseOption(options, BUILD_MODE, options_.build_mode); | |||||
ParseOption(options, BUILD_STEP, options_.build_step); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -1549,6 +1556,7 @@ void GraphManager::ParseOption(const std::map<std::string, std::string> &options | |||||
std::string &option) { | std::string &option) { | ||||
auto iter = options.find(key); | auto iter = options.find(key); | ||||
if (iter != options.end()) { | if (iter != options.end()) { | ||||
GELOGD("Set option %s from value %s to value%s", key.c_str(), option.c_str(), iter->second.c_str()); | |||||
option = iter->second; | option = iter->second; | ||||
} | } | ||||
} | } | ||||
@@ -3132,6 +3140,21 @@ Status GraphManager::ConvertGraphToFile(ComputeGraphPtr &compute_graph, GraphPar | |||||
non_tuning_subgraphs.push_back(sub_graph_tmp); | non_tuning_subgraphs.push_back(sub_graph_tmp); | ||||
} | } | ||||
} | } | ||||
// for function graphs to tune | |||||
for (auto &function_graph : compute_graph->GetAllSubgraphs()) { | |||||
auto subgraph_list = sub_graph_map[function_graph]; | |||||
for (const auto &sub_graph_info_ptr : subgraph_list) { | |||||
GE_CHECK_NOTNULL(sub_graph_info_ptr); | |||||
ComputeGraphPtr sub_graph_tmp = sub_graph_info_ptr->GetSubGraph(); | |||||
// need to tuning | |||||
if (sub_graph_info_ptr->GetEngineName() == kVectorEngine || | |||||
sub_graph_info_ptr->GetEngineName() == kAIcoreEngine) { | |||||
tuning_subgraphs.push_back(sub_graph_tmp); | |||||
} else { | |||||
non_tuning_subgraphs.push_back(sub_graph_tmp); | |||||
} | |||||
} | |||||
} | |||||
return TuningUtils::ConvertGraphToFile(tuning_subgraphs, non_tuning_subgraphs, exe_flag, path); | return TuningUtils::ConvertGraphToFile(tuning_subgraphs, non_tuning_subgraphs, exe_flag, path); | ||||
} | } | ||||
@@ -249,6 +249,7 @@ struct GraphManagerOptions { | |||||
std::string save_original_model; | std::string save_original_model; | ||||
std::string build_mode; | std::string build_mode; | ||||
std::string build_step; | std::string build_step; | ||||
std::string tuning_path; | |||||
std::string input_shape; | std::string input_shape; | ||||
std::string dynamic_dims; | std::string dynamic_dims; | ||||
int32_t dynamic_node_type = -1; | int32_t dynamic_node_type = -1; | ||||
@@ -275,7 +276,8 @@ struct GraphManagerOptions { | |||||
is_single_op(false), | is_single_op(false), | ||||
save_original_model("false"), | save_original_model("false"), | ||||
build_mode(""), | build_mode(""), | ||||
build_step("") {} | |||||
build_step(""), | |||||
tuning_path(""){} | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
@@ -347,14 +347,18 @@ ge::Status VarManager::Init(const uint32_t &version, const uint64_t &session_id, | |||||
const uint64_t &job_id) { | const uint64_t &job_id) { | ||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
GELOGI("VarManager::Init, session id = %lu.", session_id); | GELOGI("VarManager::Init, session id = %lu.", session_id); | ||||
version_ = version; | |||||
device_id_ = device_id; | |||||
session_id_ = session_id; | |||||
job_id_ = job_id; | |||||
var_resource_ = std::unique_ptr<VarResource>(new (std::nothrow) VarResource(session_id_)); | |||||
if (var_resource_ == nullptr) { | if (var_resource_ == nullptr) { | ||||
GELOGW("VarManager has not been init."); | |||||
return ge::INTERNAL_ERROR; | |||||
version_ = version; | |||||
device_id_ = device_id; | |||||
session_id_ = session_id; | |||||
job_id_ = job_id; | |||||
var_resource_ = std::unique_ptr<VarResource>(new (std::nothrow) VarResource(session_id_)); | |||||
if (var_resource_ == nullptr) { | |||||
GELOGW("VarManager init failed session id = %lu.", session_id); | |||||
return ge::INTERNAL_ERROR; | |||||
} | |||||
} else { | |||||
GELOGW("VarManager::has been inited, session id = %lu.", session_id); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -555,6 +555,8 @@ void NetOutputPass::AddInOutForNetOutputOp(const ComputeGraphPtr &graph, OpDescP | |||||
return; | return; | ||||
} | } | ||||
ge::GeTensorDesc out_desc = src_node->GetOpDesc()->GetOutputDesc(src_index); | ge::GeTensorDesc out_desc = src_node->GetOpDesc()->GetOutputDesc(src_index); | ||||
out_desc.SetFormat(FORMAT_ND); | |||||
out_desc.SetOriginFormat(FORMAT_ND); | |||||
GE_IF_BOOL_EXEC(net_output_desc->AddInputDesc(out_desc) != SUCCESS, GELOGW("add input desc failed"); return ); | GE_IF_BOOL_EXEC(net_output_desc->AddInputDesc(out_desc) != SUCCESS, GELOGW("add input desc failed"); return ); | ||||
is_input_const.push_back(PassUtils::IsConstant(src_node)); | is_input_const.push_back(PassUtils::IsConstant(src_node)); | ||||
++iter; | ++iter; | ||||
@@ -23,6 +23,7 @@ | |||||
#include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h" | #include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h" | ||||
#include "common/formats/format_transfers/format_transfer_transpose.h" | #include "common/formats/format_transfers/format_transfer_transpose.h" | ||||
#include "common/formats/utils/formats_trans_utils.h" | #include "common/formats/utils/formats_trans_utils.h" | ||||
#include "common/util/error_manager/error_manager.h" | |||||
#include "common/helper/model_helper.h" | #include "common/helper/model_helper.h" | ||||
#include "common/math/math_util.h" | #include "common/math/math_util.h" | ||||
#include "common/op/ge_op_utils.h" | #include "common/op/ge_op_utils.h" | ||||
@@ -1304,7 +1305,8 @@ Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, | |||||
auto format = desc.GetFormat(); | auto format = desc.GetFormat(); | ||||
auto origin_format = desc.GetOriginFormat(); | auto origin_format = desc.GetOriginFormat(); | ||||
// data maybe internal format [FRACTAL_NZ] at singleop process such as GEMM. | // data maybe internal format [FRACTAL_NZ] at singleop process such as GEMM. | ||||
bool need_check_internal_format = (!IsTansDataOpData(input_node)) && (!options_.is_single_op); | |||||
auto tune_flag = (options_.build_mode == BUILD_MODE_TUNING) && (options_.build_step == BUILD_STEP_AFTER_BUILDER); | |||||
bool need_check_internal_format = (!IsTansDataOpData(input_node)) && (!options_.is_single_op) && (!tune_flag); | |||||
if (need_check_internal_format) { | if (need_check_internal_format) { | ||||
bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format); | bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format); | ||||
if (is_internal) { | if (is_internal) { | ||||
@@ -1346,19 +1348,22 @@ Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input, | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
ge::TensorUtils::SetSize(desc, shape_size); | ge::TensorUtils::SetSize(desc, shape_size); | ||||
graphStatus graph_ret = op->UpdateInputDesc(0, desc); | |||||
if (graph_ret != GRAPH_SUCCESS) { | |||||
GELOGE(graph_ret, "UpdateInputDesc fail, graph_ret:%u", graph_ret); | |||||
return graph_ret; | |||||
} | |||||
// Size will be recalculated in the build stage | |||||
ge::TensorUtils::SetSize(desc, 0); | |||||
graph_ret = op->UpdateOutputDesc(0, desc); | |||||
if (graph_ret != GRAPH_SUCCESS) { | |||||
GELOGE(graph_ret, "UpdateOutputDesc fail, graph_ret:%u", graph_ret); | |||||
return graph_ret; | |||||
if (!tune_flag) { | |||||
graphStatus graph_ret = op->UpdateInputDesc(0, desc); | |||||
if (graph_ret != GRAPH_SUCCESS) { | |||||
GELOGE(graph_ret, "UpdateInputDesc fail, graph_ret:%u", graph_ret); | |||||
return graph_ret; | |||||
} | |||||
// Size will be recalculated in the build stage | |||||
ge::TensorUtils::SetSize(desc, 0); | |||||
graph_ret = op->UpdateOutputDesc(0, desc); | |||||
if (graph_ret != GRAPH_SUCCESS) { | |||||
GELOGE(graph_ret, "UpdateOutputDesc fail, graph_ret:%u", graph_ret); | |||||
return graph_ret; | |||||
} | |||||
} else { | |||||
GELOGI("data %s skip update info in tune mode", op->GetName().c_str()); | |||||
} | } | ||||
if (!dynamic_shape_range_vec.empty()) { | if (!dynamic_shape_range_vec.empty()) { | ||||
ret = UpdateDynamicInputShapeRange(index, dynamic_shape_range_vec, op, desc); | ret = UpdateDynamicInputShapeRange(index, dynamic_shape_range_vec, op, desc); | ||||
GE_CHK_STATUS_RET(ret, "Fail to update dynamic input shape range on %s.", op->GetName().c_str()); | GE_CHK_STATUS_RET(ret, "Fail to update dynamic input shape range on %s.", op->GetName().c_str()); | ||||
@@ -1763,13 +1768,13 @@ Status GraphPrepare::CheckUserInput(const std::vector<GeTensor> &user_input) { | |||||
GeTensorDesc desc(user_input[index].GetTensorDesc()); | GeTensorDesc desc(user_input[index].GetTensorDesc()); | ||||
for (size_t i = 0; i < desc.GetShape().GetDimNum(); ++i) { | for (size_t i = 0; i < desc.GetShape().GetDimNum(); ++i) { | ||||
if (desc.GetShape().GetDim(i) < 0) { | |||||
std::string situation = "data dim[" + std::to_string(i) + "][" + | |||||
std::to_string(desc.GetShape().GetDim(i)) + "]" ; | |||||
std::string reason = "it need >= 0"; | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason}); | |||||
GELOGE(GE_GRAPH_INIT_FAILED, "data dim %zu is not supported, need >= 0, real:%ld.", i, | |||||
desc.GetShape().GetDim(i)); | |||||
int64_t dim = desc.GetShape().GetDim(i); | |||||
if (dim < UNKNOWN_DIM_NUM) { | |||||
std::string situation = "data dim[" + std::to_string(i) + "][" + std::to_string(dim) + "]" ; | |||||
std::string reason = "it need >= -2"; | |||||
REPORT_INPUT_ERROR( | |||||
"E19025", std::vector<std::string>({"situation", "reason"}),std::vector<std::string>({situation, reason})); | |||||
GELOGE(GE_GRAPH_INIT_FAILED, "[Check][InputDim]data dim %zu is not supported, need >= -2, real:%ld.", i, dim); | |||||
return GE_GRAPH_INIT_FAILED; | return GE_GRAPH_INIT_FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -33,7 +33,7 @@ const int kNumOne = 1; | |||||
} // namespace | } // namespace | ||||
Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGeTensorPtr> &input, | Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGeTensorPtr> &input, | ||||
vector<GeTensorPtr> &v_output) { | vector<GeTensorPtr> &v_output) { | ||||
GELOGI("ConcatOffsetKernel in."); | |||||
GELOGD("ConcatOffsetKernel in"); | |||||
if (op_desc_ptr == nullptr) { | if (op_desc_ptr == nullptr) { | ||||
GELOGE(PARAM_INVALID, "input opdesc is nullptr."); | GELOGE(PARAM_INVALID, "input opdesc is nullptr."); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
@@ -41,7 +41,7 @@ Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<Con | |||||
// validate attrs | // validate attrs | ||||
int N = 0; | int N = 0; | ||||
if (!(AttrUtils::GetInt(op_desc_ptr, "N", N))) { | if (!(AttrUtils::GetInt(op_desc_ptr, "N", N))) { | ||||
GELOGW("Attr %s does not exist.", "N"); | |||||
GELOGW("Attr %s does not exist", "N"); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
// follow IR def, the first input is concat_dim | // follow IR def, the first input is concat_dim | ||||
@@ -50,7 +50,7 @@ Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<Con | |||||
int32_t concat_dim = *(const_cast<int32_t *>(reinterpret_cast<const int32_t *>(input_0->GetData().data()))); | int32_t concat_dim = *(const_cast<int32_t *>(reinterpret_cast<const int32_t *>(input_0->GetData().data()))); | ||||
// validate inputs | // validate inputs | ||||
if ((static_cast<int>(input.size()) != (N + kNumOne)) || (input.size() <= kConcatOffsetInputIndexOne)) { | if ((static_cast<int>(input.size()) != (N + kNumOne)) || (input.size() <= kConcatOffsetInputIndexOne)) { | ||||
GELOGW("The number of input for concat offset must be equal to %d, and must be more than one.", (N + kNumOne)); | |||||
GELOGW("The number of input for concat offset must be equal to %d, and must be more than one", (N + kNumOne)); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
@@ -61,7 +61,7 @@ Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<Con | |||||
GELOGW("Concat dim is bigger than the size of output_shape."); | GELOGW("Concat dim is bigger than the size of output_shape."); | ||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
GELOGI("Output shape size is %ld", output_size); | |||||
GELOGI("Output shape size is %ld.", output_size); | |||||
int32_t offset = 0; | int32_t offset = 0; | ||||
if (output_size < 0) { | if (output_size < 0) { | ||||
GELOGE(FAILED, "Index is negative."); | GELOGE(FAILED, "Index is negative."); | ||||
@@ -86,7 +86,7 @@ Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<Con | |||||
output_ptr->MutableTensorDesc().SetShape(output_shape); | output_ptr->MutableTensorDesc().SetShape(output_shape); | ||||
GE_IF_BOOL_EXEC(output_ptr->SetData(reinterpret_cast<uint8_t *>(buf.get()), | GE_IF_BOOL_EXEC(output_ptr->SetData(reinterpret_cast<uint8_t *>(buf.get()), | ||||
static_cast<size_t>(sizeof(DT_INT32) * output_size)) != GRAPH_SUCCESS, | static_cast<size_t>(sizeof(DT_INT32) * output_size)) != GRAPH_SUCCESS, | ||||
GELOGW("set data failed"); | |||||
GELOGW("set data failed."); | |||||
return NOT_CHANGED); | return NOT_CHANGED); | ||||
v_output.push_back(output_ptr); | v_output.push_back(output_ptr); | ||||
// caculate offset | // caculate offset | ||||
@@ -99,7 +99,7 @@ Status ConcatOffsetKernel::Compute(const OpDescPtr op_desc_ptr, const vector<Con | |||||
} | } | ||||
offset += input_dim; | offset += input_dim; | ||||
} | } | ||||
GELOGI("ConcatOffsetKernel success."); | |||||
GELOGD("ConcatOffsetKernel success"); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
REGISTER_KERNEL(CONCATOFFSET, ConcatOffsetKernel); | REGISTER_KERNEL(CONCATOFFSET, ConcatOffsetKernel); | ||||
@@ -278,7 +278,7 @@ Status GatherV2Kernel::SaveIndicesByDataType(ConstGeTensorPtr indices_tensor_ptr | |||||
auto indices_ptr = const_cast<int32_t *>(reinterpret_cast<const int32_t *>(indices_tensor_ptr->GetData().data())); | auto indices_ptr = const_cast<int32_t *>(reinterpret_cast<const int32_t *>(indices_tensor_ptr->GetData().data())); | ||||
for (int64_t i = 0; i < indices_shape.GetShapeSize(); i++) { | for (int64_t i = 0; i < indices_shape.GetShapeSize(); i++) { | ||||
if (*(indices_ptr + i) < 0 || *(indices_ptr + i) >= x_shape.GetDim(axis)) { | if (*(indices_ptr + i) < 0 || *(indices_ptr + i) >= x_shape.GetDim(axis)) { | ||||
GELOGW("indices %ld value is not in range [0, %ld)", i, x_shape.GetDim(axis)); | |||||
GELOGW("indices %ld value is not in range [0, %ld).", i, x_shape.GetDim(axis)); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
indicates_.push_back(*(indices_ptr + i)); | indicates_.push_back(*(indices_ptr + i)); | ||||
@@ -288,7 +288,7 @@ Status GatherV2Kernel::SaveIndicesByDataType(ConstGeTensorPtr indices_tensor_ptr | |||||
auto indices_ptr = const_cast<int64_t *>(reinterpret_cast<const int64_t *>(indices_tensor_ptr->GetData().data())); | auto indices_ptr = const_cast<int64_t *>(reinterpret_cast<const int64_t *>(indices_tensor_ptr->GetData().data())); | ||||
for (int64_t i = 0; i < indices_shape.GetShapeSize(); i++) { | for (int64_t i = 0; i < indices_shape.GetShapeSize(); i++) { | ||||
if (*(indices_ptr + i) < 0 || *(indices_ptr + i) >= x_shape.GetDim(axis)) { | if (*(indices_ptr + i) < 0 || *(indices_ptr + i) >= x_shape.GetDim(axis)) { | ||||
GELOGW("indices %ld value is not in range [0, %ld)", i, x_shape.GetDim(axis)); | |||||
GELOGW("indices %ld value is not in range [0, %ld).", i, x_shape.GetDim(axis)); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
indicates_.push_back(*(indices_ptr + i)); | indicates_.push_back(*(indices_ptr + i)); | ||||
@@ -344,42 +344,42 @@ Status GatherV2Kernel::Check(const OpDescPtr &op_desc_ptr, const vector<ConstGeT | |||||
auto indices_data_type = tensor1->GetTensorDesc().GetDataType(); | auto indices_data_type = tensor1->GetTensorDesc().GetDataType(); | ||||
bool is_valid_indices_data_type = indices_data_type == DT_INT32 || indices_data_type == DT_INT64; | bool is_valid_indices_data_type = indices_data_type == DT_INT32 || indices_data_type == DT_INT64; | ||||
if (!is_valid_indices_data_type) { | if (!is_valid_indices_data_type) { | ||||
GELOGW("indices datatype must be DT_INT32 or DT_INT64"); | |||||
GELOGW("indices datatype must be DT_INT32 or DT_INT64."); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
if (indices_shape.GetDimNum() > kMaxIndicatesDims) { | if (indices_shape.GetDimNum() > kMaxIndicatesDims) { | ||||
GELOGW("indices input only support 0 or 1 dims"); | |||||
GELOGW("indices input only support 0 or 1 dims."); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
void GatherV2Kernel::DebugPrint(int64_t axis, const GeShape &x_shape, const GeShape &indices_shape, | void GatherV2Kernel::DebugPrint(int64_t axis, const GeShape &x_shape, const GeShape &indices_shape, | ||||
const std::vector<int64_t> &y_shape) { | const std::vector<int64_t> &y_shape) { | ||||
GELOGD("GatherV2Kernel axis:%ld x_shape:%zu indices_shape:%zu y_shape:%zu", axis, x_shape.GetDimNum(), | |||||
GELOGD("GatherV2Kernel axis:%ld x_shape:%zu indices_shape:%zu y_shape:%zu.", axis, x_shape.GetDimNum(), | |||||
indices_shape.GetDimNum(), y_shape.size()); | indices_shape.GetDimNum(), y_shape.size()); | ||||
for (size_t i = 0; i < x_shape.GetDimNum(); i++) { | for (size_t i = 0; i < x_shape.GetDimNum(); i++) { | ||||
GELOGD("GatherV2Kernel x_shape[%zu]: %ld", i, x_shape.GetDim(i)); | |||||
GELOGD("GatherV2Kernel x_shape[%zu]: %ld.", i, x_shape.GetDim(i)); | |||||
} | } | ||||
for (size_t i = 0; i < indices_shape.GetDimNum(); i++) { | for (size_t i = 0; i < indices_shape.GetDimNum(); i++) { | ||||
GELOGD("GatherV2Kernel indices_shape[%zu]: %ld", i, indices_shape.GetDim(i)); | |||||
GELOGD("GatherV2Kernel indices_shape[%zu]: %ld.", i, indices_shape.GetDim(i)); | |||||
} | } | ||||
for (size_t i = 0; i < y_shape.size(); i++) { | for (size_t i = 0; i < y_shape.size(); i++) { | ||||
GELOGD("GatherV2Kernel y_shape[%zu]: %ld", i, y_shape[i]); | |||||
GELOGD("GatherV2Kernel y_shape[%zu]: %ld.", i, y_shape[i]); | |||||
} | } | ||||
for (auto ele : indicates_) { | for (auto ele : indicates_) { | ||||
GELOGD("GatherV2Kernel indices:%ld", ele); | |||||
GELOGD("GatherV2Kernel indices:%ld.", ele); | |||||
} | } | ||||
} | } | ||||
Status GatherV2Kernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGeTensorPtr> &input, | Status GatherV2Kernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGeTensorPtr> &input, | ||||
vector<GeTensorPtr> &v_output) { | vector<GeTensorPtr> &v_output) { | ||||
GELOGI("Enter GatherV2Kernel Process."); | |||||
GELOGI("Enter GatherV2Kernel Process"); | |||||
Status ret = Check(op_desc_ptr, input, v_output); | Status ret = Check(op_desc_ptr, input, v_output); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGW("param check failed."); | |||||
GELOGW("param check failed"); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
GELOGI("GatherV2Kernel[%s] start Process.", op_desc_ptr->GetName().c_str()); | |||||
GELOGI("GatherV2Kernel[%s] start Process", op_desc_ptr->GetName().c_str()); | |||||
ConstGeTensorPtr tensor0 = input.at(kGatherV2InputIndexZero); | ConstGeTensorPtr tensor0 = input.at(kGatherV2InputIndexZero); | ||||
ConstGeTensorPtr tensor1 = input.at(kGatherV2InputIndexOne); | ConstGeTensorPtr tensor1 = input.at(kGatherV2InputIndexOne); | ||||
ConstGeTensorPtr tensor2 = input.at(kGatherV2InputIndexTwo); | ConstGeTensorPtr tensor2 = input.at(kGatherV2InputIndexTwo); | ||||
@@ -394,7 +394,7 @@ Status GatherV2Kernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGe | |||||
axis = axis >= 0 ? axis : axis + x_shape.GetDimNum(); | axis = axis >= 0 ? axis : axis + x_shape.GetDimNum(); | ||||
// check axis value | // check axis value | ||||
if (axis < 0 || (axis + 1) > static_cast<int64_t>(x_shape.GetDimNum())) { | if (axis < 0 || (axis + 1) > static_cast<int64_t>(x_shape.GetDimNum())) { | ||||
GELOGW("axis is invalid"); | |||||
GELOGW("axis is invalid!"); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
auto indices_data_type = tensor1->GetTensorDesc().GetDataType(); | auto indices_data_type = tensor1->GetTensorDesc().GetDataType(); | ||||
@@ -407,7 +407,7 @@ Status GatherV2Kernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGe | |||||
// check input data type | // check input data type | ||||
auto x_data_type = tensor0->GetTensorDesc().GetDataType(); | auto x_data_type = tensor0->GetTensorDesc().GetDataType(); | ||||
if (supported_type.find(x_data_type) == supported_type.end()) { | if (supported_type.find(x_data_type) == supported_type.end()) { | ||||
GELOGI("GatherV2Kernel does not support this Data type:%s", TypeUtils::DataTypeToSerialString(x_data_type).c_str()); | |||||
GELOGI("GatherV2Kernel does not support this Data type:%s.", TypeUtils::DataTypeToSerialString(x_data_type).c_str()); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
// calc output shape | // calc output shape | ||||
@@ -61,4 +61,5 @@ Status IdentityKernel::Compute(const ge::OpDescPtr op_desc, const std::vector<ge | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
REGISTER_KERNEL(IDENTITY, IdentityKernel); | REGISTER_KERNEL(IDENTITY, IdentityKernel); | ||||
REGISTER_KERNEL(PLACEHOLDERWITHDEFAULT, IdentityKernel); | |||||
} // namespace ge | } // namespace ge |
@@ -84,14 +84,14 @@ void GetOriginStrideVec(const std::vector<ge::ConstGeTensorPtr> &input, vector<i | |||||
} // namespace | } // namespace | ||||
Status StridedSliceKernel::Compute(const ge::OpDescPtr attr, const std::vector<ge::ConstGeTensorPtr> &input, | Status StridedSliceKernel::Compute(const ge::OpDescPtr attr, const std::vector<ge::ConstGeTensorPtr> &input, | ||||
vector<ge::GeTensorPtr> &v_output) { | vector<ge::GeTensorPtr> &v_output) { | ||||
GELOGD("StridedSliceKernel in."); | |||||
GELOGD("StridedSliceKernel in"); | |||||
// 1.Check input and attrs | // 1.Check input and attrs | ||||
if (CheckAndGetAttr(attr) != SUCCESS) { | if (CheckAndGetAttr(attr) != SUCCESS) { | ||||
GELOGW("Check and get attrs failed.Ignore kernel."); | |||||
GELOGW("Check and get attrs failed.Ignore kernel"); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
if (CheckInputParam(input) != SUCCESS) { | if (CheckInputParam(input) != SUCCESS) { | ||||
GELOGW("Check input params failed.Ignore kernel."); | |||||
GELOGW("Check input params failed.Ignore kernel"); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
// 2.Init param with mask attrs. | // 2.Init param with mask attrs. | ||||
@@ -120,7 +120,7 @@ Status StridedSliceKernel::Compute(const ge::OpDescPtr attr, const std::vector<g | |||||
auto ret = OpUtils::SetOutputSliceData(data, static_cast<int64_t>(data_size), data_type, input_dims, begin_vec, | auto ret = OpUtils::SetOutputSliceData(data, static_cast<int64_t>(data_size), data_type, input_dims, begin_vec, | ||||
output_dims, output_ptr.get(), stride_vec); | output_dims, output_ptr.get(), stride_vec); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(INTERNAL_ERROR, "SetOutputSliceData failed."); | |||||
GELOGE(INTERNAL_ERROR, "SetOutputSliceData failed"); | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
@@ -133,7 +133,7 @@ Status StridedSliceKernel::Compute(const ge::OpDescPtr attr, const std::vector<g | |||||
GetOutputDims(final_dim_size, output_dims, v_dims); | GetOutputDims(final_dim_size, output_dims, v_dims); | ||||
t_d.SetShape(GeShape(v_dims)); | t_d.SetShape(GeShape(v_dims)); | ||||
v_output.push_back(output_ptr); | v_output.push_back(output_ptr); | ||||
GELOGI("StridedSliceKernel success."); | |||||
GELOGI("StridedSliceKernel success"); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status StridedSliceKernel::CheckAndGetAttr(const OpDescPtr &attr) { | Status StridedSliceKernel::CheckAndGetAttr(const OpDescPtr &attr) { | ||||
@@ -144,7 +144,7 @@ Status StridedSliceKernel::CheckAndGetAttr(const OpDescPtr &attr) { | |||||
// Get all op attr value of strided_slice | // Get all op attr value of strided_slice | ||||
for (auto &attr_2_value : attr_value_map_) { | for (auto &attr_2_value : attr_value_map_) { | ||||
if (!AttrUtils::GetInt(attr, attr_2_value.first, attr_2_value.second)) { | if (!AttrUtils::GetInt(attr, attr_2_value.first, attr_2_value.second)) { | ||||
GELOGE(PARAM_INVALID, "Get %s attr failed.", attr_2_value.first.c_str()); | |||||
GELOGE(PARAM_INVALID, "Get %s attr failed", attr_2_value.first.c_str()); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
} | } | ||||
@@ -182,7 +182,7 @@ Status StridedSliceKernel::CheckInputParam(const std::vector<ConstGeTensorPtr> & | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
if (kIndexNumberType.find(begin_tensor_desc.GetDataType()) == kIndexNumberType.end()) { | if (kIndexNumberType.find(begin_tensor_desc.GetDataType()) == kIndexNumberType.end()) { | ||||
GELOGW("Data type of StridedSlice OP(begin,end,strides) must be int32 or int64."); | |||||
GELOGW("Data type of StridedSlice OP(begin,end,strides) must be int32 or int64"); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
@@ -250,7 +250,7 @@ Status StridedSliceKernel::InitParamWithAttrs(const std::vector<ConstGeTensorPtr | |||||
end_i = x_dims.at(i); | end_i = x_dims.at(i); | ||||
stride_i = 1; | stride_i = 1; | ||||
} | } | ||||
GELOGD("Before mask calculate. Begin is : %ld\t,end is : %ld\t stride is : %ld\t x_dim_i is : %ld.", | |||||
GELOGD("Before mask calculate. Begin is : %ld\t,end is : %ld\t stride is : %ld\t x_dim_i is : %ld", | |||||
begin_i, end_i, stride_i, x_dims.at(i)); | begin_i, end_i, stride_i, x_dims.at(i)); | ||||
auto ret = MaskCal(i, begin_i, end_i, x_dims.at(i)); | auto ret = MaskCal(i, begin_i, end_i, x_dims.at(i)); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
@@ -258,7 +258,7 @@ Status StridedSliceKernel::InitParamWithAttrs(const std::vector<ConstGeTensorPtr | |||||
return NOT_CHANGED; | return NOT_CHANGED; | ||||
} | } | ||||
int64_t dim_final; | int64_t dim_final; | ||||
GELOGD("Before stride calculate. Begin is : %ld\t,end is : %ld\t stride is : %ld\t x_dim_i is : %ld.", | |||||
GELOGD("Before stride calculate. Begin is : %ld\t,end is : %ld\t stride is : %ld\t x_dim_i is : %ld", | |||||
begin_i, end_i, stride_i, x_dims.at(i)); | begin_i, end_i, stride_i, x_dims.at(i)); | ||||
(void) StrideCal(x_dims.at(i), begin_i, end_i, stride_i, dim_final); | (void) StrideCal(x_dims.at(i), begin_i, end_i, stride_i, dim_final); | ||||
output_dims.push_back(dim_final); | output_dims.push_back(dim_final); | ||||
@@ -29,7 +29,7 @@ const size_t kMinimumPiplineStages = 2; | |||||
const int kDefaultLoopCount = 10; | const int kDefaultLoopCount = 10; | ||||
} | } | ||||
HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model) | HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model) | ||||
: model_(model), run_flag_(false) { | |||||
: model_(model), run_flag_(false), data_dumper_(nullptr) { | |||||
} | } | ||||
HybridModelAsyncExecutor::~HybridModelAsyncExecutor() { | HybridModelAsyncExecutor::~HybridModelAsyncExecutor() { | ||||
@@ -444,31 +444,20 @@ Status HybridModelAsyncExecutor::Execute(const std::vector<DataBuffer> &inputs, | |||||
TensorValue tensor_value(inputs[i].data, inputs[i].length); | TensorValue tensor_value(inputs[i].data, inputs[i].length); | ||||
args.inputs[i] = tensor_value; | args.inputs[i] = tensor_value; | ||||
} | } | ||||
for (size_t i = 0; i < outputs.size(); ++i) { | |||||
args.outputs.emplace_back(TensorValue(outputs[i].data, outputs[i].length)); | |||||
} | |||||
// usr must designate input tensorDesc when input shape is dynamic in inference | |||||
for (size_t i = 0; i < input_desc.size(); ++i) { | |||||
ConstGeTensorDescPtr tensor_desc_ptr = MakeShared<GeTensorDesc>(input_desc[i]); | |||||
args.input_desc.emplace_back(tensor_desc_ptr); | |||||
} | |||||
GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model."); | GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model."); | ||||
for (const auto &output_tensor_desc : args.output_desc) { | for (const auto &output_tensor_desc : args.output_desc) { | ||||
output_desc.emplace_back(*output_tensor_desc); | output_desc.emplace_back(*output_tensor_desc); | ||||
} | } | ||||
for (size_t i = 0; i < args.outputs.size(); ++i) { | |||||
int64_t output_real_size = 0; | |||||
ge::graphStatus graph_status = TensorUtils::GetTensorSizeInBytes(output_desc[i], output_real_size); | |||||
if (graph_status != GRAPH_SUCCESS) { | |||||
GELOGE(FAILED, "Get tensor size in bytes failed."); | |||||
return FAILED; | |||||
} | |||||
if (output_real_size > 0) { | |||||
if (outputs[i].length < static_cast<uint64_t>(output_real_size)) { | |||||
GELOGE(FAILED, "output idx[%zu], the memory size of output[%lu] given by " | |||||
"user should be greater than or equal to the real size of output[%ld]", | |||||
i, outputs[i].length, output_real_size); | |||||
return FAILED; | |||||
} | |||||
GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, args.outputs[i].GetData(), output_real_size, | |||||
RT_MEMCPY_DEVICE_TO_DEVICE)); | |||||
} | |||||
outputs[i].length = output_real_size; | |||||
} | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -44,6 +44,27 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item( | |||||
} | } | ||||
} | } | ||||
Status ShapeInferenceState::CheckInputShapeByShapeRange(const GeTensorDesc &tensor_desc, | |||||
const GeTensorDesc &target_tensor_desc) const { | |||||
std::vector<std::pair<int64_t, int64_t>> shape_range; | |||||
if (tensor_desc.GetShapeRange(shape_range) != SUCCESS) { | |||||
GELOGE(PARAM_INVALID, "Get shape range failed."); | |||||
return PARAM_INVALID; | |||||
} | |||||
if (shape_range.empty()) { | |||||
GELOGD("Shape range is empty, no need to check input shape."); | |||||
return SUCCESS; | |||||
} | |||||
GeShape target_shape = target_tensor_desc.GetShape(); | |||||
if (TensorUtils::CheckShapeByShapeRange(target_shape, shape_range) != SUCCESS) { | |||||
GELOGE(PARAM_INVALID, "Check shape by shape range failed."); | |||||
return PARAM_INVALID; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target) { | Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target) { | ||||
if (node_item.IsInputShapeStatic(idx)) { | if (node_item.IsInputShapeStatic(idx)) { | ||||
GELOGD("[%s] Trying to update static shape, idx = %d. old shape = [%s], new shape = [%s]", | GELOGD("[%s] Trying to update static shape, idx = %d. old shape = [%s], new shape = [%s]", | ||||
@@ -54,19 +75,27 @@ Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
std::lock_guard<std::mutex> lk(mu_); | |||||
auto &input_desc = input_tensor_desc[idx]; | |||||
GeShape shape = target.GetShape(); | |||||
input_desc.SetShape(shape); | |||||
input_desc.SetOriginShape(target.GetOriginShape()); | |||||
int64_t tensor_size = -1; | int64_t tensor_size = -1; | ||||
(void) TensorUtils::GetSize(target, tensor_size); | (void) TensorUtils::GetSize(target, tensor_size); | ||||
if (tensor_size <= 0) { | |||||
Format format = input_desc.GetFormat(); | |||||
DataType data_type = input_desc.GetDataType(); | |||||
if (TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size) != GRAPH_SUCCESS) { | |||||
GELOGE(FAILED, "[%s] Calculate tensor memory size failed.", node_item.NodeName().c_str()); | |||||
return FAILED; | |||||
} | |||||
} | |||||
GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s], size = %ld", | GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s], size = %ld", | ||||
node_item.NodeName().c_str(), | node_item.NodeName().c_str(), | ||||
idx, | idx, | ||||
target.GetShape().ToString().c_str(), | |||||
shape.ToString().c_str(), | |||||
target.GetOriginShape().ToString().c_str(), | target.GetOriginShape().ToString().c_str(), | ||||
tensor_size); | tensor_size); | ||||
std::lock_guard<std::mutex> lk(mu_); | |||||
auto &input_desc = input_tensor_desc[idx]; | |||||
input_desc.SetShape(target.GetShape()); | |||||
input_desc.SetOriginShape(target.GetOriginShape()); | |||||
(void) TensorUtils::SetSize(input_desc, tensor_size); | (void) TensorUtils::SetSize(input_desc, tensor_size); | ||||
if (--num_pending_shapes_ <= 0) { | if (--num_pending_shapes_ <= 0) { | ||||
ready_cv_.notify_all(); | ready_cv_.notify_all(); | ||||
@@ -58,6 +58,8 @@ struct ShapeInferenceState { | |||||
const vector<GeTensorDesc> &GetOutputTensorDesc() const; | const vector<GeTensorDesc> &GetOutputTensorDesc() const; | ||||
Status CheckInputShapeByShapeRange(const GeTensorDesc &tensor_desc, const GeTensorDesc &target_tensor_desc) const; | |||||
const NodeItem &node_item; | const NodeItem &node_item; | ||||
private: | private: | ||||
@@ -41,7 +41,7 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { | |||||
// Wait for "const input nodes" if node's shape inference function requires any. | // Wait for "const input nodes" if node's shape inference function requires any. | ||||
// Even if output shape is static, there are cases that the const-input will be used in OpTiling and Execution | // Even if output shape is static, there are cases that the const-input will be used in OpTiling and Execution | ||||
GE_CHK_STATUS_RET_NOLOG(AwaitDependentNodes(node_state)); | GE_CHK_STATUS_RET_NOLOG(AwaitDependentNodes(node_state)); | ||||
if (node_item.is_output_shape_static) { | |||||
if (node_item.is_output_shape_static && !node_item.is_need_force_infershape) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -225,23 +225,19 @@ Status HybridModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, st | |||||
GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); | GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); | ||||
Format format = op_desc->GetInputDescPtr(0)->GetFormat(); | Format format = op_desc->GetInputDescPtr(0)->GetFormat(); | ||||
input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); | |||||
DataType data_type = op_desc->GetInputDescPtr(0)->GetDataType(); | |||||
input.data_type = static_cast<uint32_t>(data_type); | |||||
input.name = op_desc->GetName(); | input.name = op_desc->GetName(); | ||||
int64_t input_size = 0; | |||||
GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); | |||||
// support dynamic shape | |||||
if (input_size < 0) { | |||||
GELOGD("dynamic shape scene, input size is unknown. " | |||||
"format=%d, data_type=%d, input_size=%ld", | |||||
format, input.data_type, input_size); | |||||
input_size = kMemSizeUnknownShape; // -1 | |||||
GeShape shape = op_desc->GetInputDescPtr(0)->GetShape(); | |||||
int64_t tensor_size = 0; | |||||
if (TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size) != GRAPH_SUCCESS) { | |||||
GELOGE(FAILED, "Calculate tensor mem size failed."); | |||||
return FAILED; | |||||
} | } | ||||
// not support dynamic shape input for now, so input_size here will be not less than zero. | |||||
input.size = input_size; | |||||
if (tensor_size == kMemSizeUnknownShape) { | |||||
tensor_size = 0; | |||||
} | |||||
input.size = static_cast<uint64_t>(tensor_size); | |||||
CreateInputDimsInfo(op_desc, input); | CreateInputDimsInfo(op_desc, input); | ||||
formats.push_back(format); | formats.push_back(format); | ||||
@@ -284,6 +280,9 @@ void HybridModel::CreateOutput(ConstGeTensorDescPtr &output_desc, | |||||
} | } | ||||
int64_t tensor_size = 0; | int64_t tensor_size = 0; | ||||
(void)TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size); | (void)TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size); | ||||
if (tensor_size == kMemSizeUnknownShape) { | |||||
tensor_size = 0; | |||||
} | |||||
output_desc_info.size = static_cast<uint64_t>(tensor_size); | output_desc_info.size = static_cast<uint64_t>(tensor_size); | ||||
output_desc_info.data_type = output_desc->GetDataType(); | output_desc_info.data_type = output_desc->GetDataType(); | ||||
} | } | ||||
@@ -154,6 +154,7 @@ class HybridModel { | |||||
uint32_t model_id_ = 0; | uint32_t model_id_ = 0; | ||||
uint8_t *var_mem_base_ = nullptr; | uint8_t *var_mem_base_ = nullptr; | ||||
std::unique_ptr<TensorBuffer> weight_buffer_; | std::unique_ptr<TensorBuffer> weight_buffer_; | ||||
std::map<string, std::unique_ptr<TensorBuffer>> weight_buffer_map_; | |||||
RuntimeParam root_runtime_param_; | RuntimeParam root_runtime_param_; | ||||
string om_name_; | string om_name_; | ||||
}; | }; | ||||
@@ -50,6 +50,7 @@ const char *const kProfilingBpNode = "ProfilingBpNode"; | |||||
const char *const kProfilingEndNode = "ProfilingEndNode"; | const char *const kProfilingEndNode = "ProfilingEndNode"; | ||||
const char *const kProfilingArNode = "ProfilingAllReduceNode"; | const char *const kProfilingArNode = "ProfilingAllReduceNode"; | ||||
const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE"; | const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE"; | ||||
const char *const kForceInfershape = "_force_infershape_when_running"; | |||||
Status SetOutputNameAttr(ComputeGraph &graph) { | Status SetOutputNameAttr(ComputeGraph &graph) { | ||||
vector<string> output_names; | vector<string> output_names; | ||||
@@ -171,6 +172,9 @@ Status HybridModelBuilder::ValidateParams() { | |||||
Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_item) { | Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_item) { | ||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
GE_CHK_STATUS_RET(ParseForceInfershapeNodes(node, node_item), | |||||
"[%s] Failed to parse force_infershape node.", | |||||
node_item.NodeName().c_str()); | |||||
vector<string> dependencies = node->GetOpDesc()->GetOpInferDepends(); | vector<string> dependencies = node->GetOpDesc()->GetOpInferDepends(); | ||||
GE_CHK_STATUS_RET(ParseDependentInputNodes(node_item, dependencies), | GE_CHK_STATUS_RET(ParseDependentInputNodes(node_item, dependencies), | ||||
"[%s] Failed to parse node dependencies.", | "[%s] Failed to parse node dependencies.", | ||||
@@ -263,6 +267,17 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status HybridModelBuilder::ParseForceInfershapeNodes(const NodePtr &node, NodeItem &node_item) { | |||||
auto op_desc = node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
// not care result, if no this attr, stand for the op does not need force infershape | |||||
(void)AttrUtils::GetBool(op_desc, kForceInfershape, node_item.is_need_force_infershape); | |||||
GELOGD("node [%s] is need do infershape , flag is %d", | |||||
op_desc->GetName().c_str(), | |||||
node_item.is_need_force_infershape); | |||||
return SUCCESS; | |||||
} | |||||
Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const std::vector<string> &dependencies) { | Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const std::vector<string> &dependencies) { | ||||
std::set<NodePtr> dependent_input_nodes; | std::set<NodePtr> dependent_input_nodes; | ||||
auto &ge_node = node_item.node; | auto &ge_node = node_item.node; | ||||
@@ -997,70 +1012,65 @@ Status HybridModelBuilder::InitVariableTensors() { | |||||
Status HybridModelBuilder::InitWeights() { | Status HybridModelBuilder::InitWeights() { | ||||
// For constant in root graph | // For constant in root graph | ||||
const auto &root_graph = ge_root_model_->GetRootGraph(); | |||||
const auto &subgraph_models = ge_root_model_->GetSubgraphInstanceNameToModel(); | |||||
auto iter = subgraph_models.find(root_graph->GetName()); | |||||
if (iter == subgraph_models.end()) { | |||||
GELOGD("Root graph model not found"); | |||||
return SUCCESS; | |||||
} | |||||
for (const auto &subgraph_model : ge_root_model_->GetSubgraphInstanceNameToModel()) { | |||||
const auto &weight_buffer = subgraph_model.second->GetWeight(); | |||||
if (weight_buffer.GetSize() == 0) { | |||||
GELOGD("weight is empty"); | |||||
return SUCCESS; | |||||
} | |||||
auto &root_model = iter->second; | |||||
const auto &weight_buffer = root_model->GetWeight(); | |||||
if (weight_buffer.GetSize() == 0) { | |||||
GELOGD("weight is empty"); | |||||
return SUCCESS; | |||||
} | |||||
auto allocator = NpuMemoryAllocator::GetAllocator(); | |||||
GE_CHECK_NOTNULL(allocator); | |||||
auto sub_weight_buffer = TensorBuffer::Create(allocator, weight_buffer.size()); | |||||
GE_CHECK_NOTNULL(sub_weight_buffer); | |||||
auto weight_base = reinterpret_cast<uint8_t *>(sub_weight_buffer->GetData()); | |||||
GE_CHK_RT_RET(rtMemcpy(weight_base, | |||||
sub_weight_buffer->GetSize(), | |||||
weight_buffer.GetData(), | |||||
weight_buffer.GetSize(), | |||||
RT_MEMCPY_HOST_TO_DEVICE)); | |||||
GELOGI("Init weight mem successfully, weight base %p, weight size = %zu", | |||||
weight_base, | |||||
sub_weight_buffer->GetSize()); | |||||
auto root_graph = GraphUtils::GetComputeGraph(subgraph_model.second->GetGraph()); | |||||
hybrid_model_.weight_buffer_map_.emplace(root_graph->GetName(),std::move(sub_weight_buffer)); | |||||
for (auto &node : root_graph->GetDirectNode()) { | |||||
if (node->GetType() != CONSTANT) { | |||||
continue; | |||||
} | |||||
auto allocator = NpuMemoryAllocator::GetAllocator(); | |||||
GE_CHECK_NOTNULL(allocator); | |||||
hybrid_model_.weight_buffer_ = TensorBuffer::Create(allocator, weight_buffer.size()); | |||||
GE_CHECK_NOTNULL(hybrid_model_.weight_buffer_); | |||||
auto weight_base = reinterpret_cast<uint8_t *>(hybrid_model_.weight_buffer_->GetData()); | |||||
GE_CHK_RT_RET(rtMemcpy(weight_base, | |||||
hybrid_model_.weight_buffer_->GetSize(), | |||||
weight_buffer.GetData(), | |||||
weight_buffer.GetSize(), | |||||
RT_MEMCPY_HOST_TO_DEVICE)); | |||||
GELOGI("Init weight mem successfully, weight base %p, weight size = %zu", | |||||
weight_base, | |||||
hybrid_model_.weight_buffer_->GetSize()); | |||||
for (auto &node : root_graph->GetDirectNode()) { | |||||
if (node->GetType() != CONSTANT) { | |||||
continue; | |||||
} | |||||
auto op_desc = node->GetOpDesc(); | |||||
auto v_weights = ModelUtils::GetWeights(op_desc); | |||||
if (v_weights.empty()) { | |||||
GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", node->GetName().c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get()); | |||||
GE_CHECK_NOTNULL(ge_tensor); | |||||
const GeTensorDesc &tensor_desc = ge_tensor->GetTensorDesc(); | |||||
int64_t tensor_size = 0; | |||||
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*op_desc->MutableOutputDesc(0), tensor_size), | |||||
"[%s] Failed to get tensor size", | |||||
node->GetName().c_str()); | |||||
int64_t data_offset = 0; | |||||
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetDataOffset(tensor_desc, data_offset), | |||||
"[%s] Failed to get data offset", | |||||
node->GetName().c_str()); | |||||
GELOGD("[%s] Start to init Constant node [%s], size = %ld, offset = %ld", | |||||
GetGraphName(), | |||||
node->GetName().c_str(), | |||||
tensor_size, | |||||
data_offset); | |||||
auto op_desc = node->GetOpDesc(); | |||||
auto v_weights = ModelUtils::GetWeights(op_desc); | |||||
if (v_weights.empty()) { | |||||
GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", node->GetName().c_str()); | |||||
return INTERNAL_ERROR; | |||||
auto tensor_buffer = TensorBuffer::Create(weight_base + data_offset, tensor_size); | |||||
GE_CHECK_NOTNULL(tensor_buffer); | |||||
std::unique_ptr<TensorValue> constant_tensor(new (std::nothrow)TensorValue(std::move(tensor_buffer))); | |||||
GE_CHECK_NOTNULL(constant_tensor); | |||||
constant_tensor->SetName("Constant_" + op_desc->GetName()); | |||||
hybrid_model_.constant_tensors_.emplace(node, std::move(constant_tensor)); | |||||
GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), node->GetName().c_str(), tensor_size); | |||||
} | } | ||||
auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get()); | |||||
GE_CHECK_NOTNULL(ge_tensor); | |||||
const GeTensorDesc &tensor_desc = ge_tensor->GetTensorDesc(); | |||||
int64_t tensor_size = 0; | |||||
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*op_desc->MutableOutputDesc(0), tensor_size), | |||||
"[%s] Failed to get tensor size", | |||||
node->GetName().c_str()); | |||||
int64_t data_offset = 0; | |||||
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetDataOffset(tensor_desc, data_offset), | |||||
"[%s] Failed to get data offset", | |||||
node->GetName().c_str()); | |||||
GELOGD("[%s] Start to init Constant node [%s], size = %ld, offset = %ld", | |||||
GetGraphName(), | |||||
node->GetName().c_str(), | |||||
tensor_size, | |||||
data_offset); | |||||
auto tensor_buffer = TensorBuffer::Create(weight_base + data_offset, tensor_size); | |||||
GE_CHECK_NOTNULL(tensor_buffer); | |||||
std::unique_ptr<TensorValue> constant_tensor(new (std::nothrow)TensorValue(std::move(tensor_buffer))); | |||||
GE_CHECK_NOTNULL(constant_tensor); | |||||
constant_tensor->SetName("Constant_" + op_desc->GetName()); | |||||
hybrid_model_.constant_tensors_.emplace(node, std::move(constant_tensor)); | |||||
GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), node->GetName().c_str(), tensor_size); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -62,6 +62,7 @@ class HybridModelBuilder { | |||||
Status IdentifySameInputs(NodeItem &node_item); | Status IdentifySameInputs(NodeItem &node_item); | ||||
Status BuildNodeItem(const NodePtr &node, NodeItem &node_item); | Status BuildNodeItem(const NodePtr &node, NodeItem &node_item); | ||||
Status GetOrCreateNodeItem(const NodePtr &node, NodeItem **node_item); | Status GetOrCreateNodeItem(const NodePtr &node, NodeItem **node_item); | ||||
Status ParseForceInfershapeNodes(const NodePtr &node, NodeItem &node_item); | |||||
Status ParseDependentInputNodes(NodeItem &node_item, const std::vector<string> &dependencies); | Status ParseDependentInputNodes(NodeItem &node_item, const std::vector<string> &dependencies); | ||||
Status ParseDependentForFusedSubgraph(NodeItem &node_item); | Status ParseDependentForFusedSubgraph(NodeItem &node_item); | ||||
Status IndexTaskDefs(); | Status IndexTaskDefs(); | ||||
@@ -83,6 +83,7 @@ struct NodeItem { | |||||
bool has_observer = false; | bool has_observer = false; | ||||
bool has_optional_inputs = false; | bool has_optional_inputs = false; | ||||
bool is_output_shape_static = true; | bool is_output_shape_static = true; | ||||
bool is_need_force_infershape = false; | |||||
UnknowShapeOpType shape_inference_type = DEPEND_IN_SHAPE; | UnknowShapeOpType shape_inference_type = DEPEND_IN_SHAPE; | ||||
std::string node_name; | std::string node_name; | ||||
std::string node_type; | std::string node_type; | ||||
@@ -33,6 +33,7 @@ const std::map<std::string, std::vector<uint32_t>> | |||||
{RESHAPE, {}}, | {RESHAPE, {}}, | ||||
{EXPANDDIMS, {}}, | {EXPANDDIMS, {}}, | ||||
{SQUEEZE, {}}, | {SQUEEZE, {}}, | ||||
{UNSQUEEZE, {}}, | |||||
{BROADCASTGRADIENTARGS, {}} | {BROADCASTGRADIENTARGS, {}} | ||||
}; | }; | ||||
@@ -236,7 +236,7 @@ Status TaskContext::AllocateOutput(int index, | |||||
ref_node->GetName().c_str(), | ref_node->GetName().c_str(), | ||||
ref_node->GetType().c_str()); | ref_node->GetType().c_str()); | ||||
TensorValue *ref_tensor = execution_context_->model->GetVariable(ref_node->GetName()); | |||||
TensorValue *ref_tensor = execution_context_->model->GetTensor(ref_node); | |||||
GE_CHECK_NOTNULL(ref_tensor); | GE_CHECK_NOTNULL(ref_tensor); | ||||
outputs_start_[index] = *ref_tensor; | outputs_start_[index] = *ref_tensor; | ||||
} else { | } else { | ||||
@@ -19,7 +19,9 @@ | |||||
#include "framework/common/string_util.h" | #include "framework/common/string_util.h" | ||||
#include "framework/common/types.h" | #include "framework/common/types.h" | ||||
#include "framework/common/util.h" | #include "framework/common/util.h" | ||||
#include "graph/compute_graph.h" | |||||
#include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
#include "graph/utils/tensor_utils.h" | |||||
using std::pair; | using std::pair; | ||||
using std::string; | using std::string; | ||||
@@ -52,6 +54,11 @@ const char *const kCompressWeightError = "it must be appointed when appoint para | |||||
const char *const kSelectImplmodeError = "only support high_performance, high_precision"; | const char *const kSelectImplmodeError = "only support high_performance, high_precision"; | ||||
const char *const kDynamicBatchSizeError = "It can only contains digit, \",\", \" \""; | const char *const kDynamicBatchSizeError = "It can only contains digit, \",\", \" \""; | ||||
const char *const kKeepDtypeError = "file not found"; | const char *const kKeepDtypeError = "file not found"; | ||||
const char *const kInputShapeRangeInvalid = "format of shape range is invalid"; | |||||
const char *const kShapeRangeValueConvertError = "transfer from string to int64 error"; | |||||
const char *const kInputShapeRangeSample1 = "\"input_name1:[n1~n2,c1,h1,w1]\""; | |||||
const char *const kInputShapeRangeSample2 = "\"[]\""; | |||||
const char *const kInputShapeRangeSample3 = "\"[1~20,3,3~6,-1]\""; | |||||
vector<string> SplitInputShape(const std::string &input_shape) { | vector<string> SplitInputShape(const std::string &input_shape) { | ||||
vector<string> shape_pair_vec; | vector<string> shape_pair_vec; | ||||
@@ -257,8 +264,132 @@ bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims | |||||
return true; | return true; | ||||
} | } | ||||
bool StringToLongNoThrow(const string &str, long &val) { | |||||
try { | |||||
val = std::stol(str); | |||||
return true; | |||||
} catch (const std::invalid_argument) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10048", {"shape_range", "reason", "sample"}, | |||||
{str, kShapeRangeValueConvertError, kInputShapeRangeSample3}); | |||||
GELOGE(PARAM_INVALID, | |||||
"Parse input parameter [--input_shape_range]'s shape range[%s] failed, reason: %s, correct sample is %s.", | |||||
str.c_str(), kShapeRangeValueConvertError, kInputShapeRangeSample3); | |||||
} catch (const std::out_of_range) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10048", {"shape_range", "reason", "sample"}, | |||||
{str, kShapeRangeValueConvertError, kInputShapeRangeSample3}); | |||||
GELOGE(PARAM_INVALID, | |||||
"Parse input parameter [--input_shape_range]'s shape range[%s] failed, reason: %s, correct sample is %s.", | |||||
str.c_str(), kShapeRangeValueConvertError, kInputShapeRangeSample3); | |||||
} | |||||
return false; | |||||
} | |||||
bool ParseSingleShapeRange(std::string &shape_range, vector<pair<int64_t, int64_t>> &shape_range_vec) { | |||||
vector<char> square_brackets; | |||||
for (auto ch : shape_range) { | |||||
if (ch == '[' || ch == ']') { | |||||
square_brackets.push_back(ch); | |||||
} | |||||
} | |||||
bool is_square_brackets = (square_brackets[0] == '[') && (square_brackets[1] == ']') && (square_brackets.size() == 2); | |||||
if (!is_square_brackets) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10048", {"shape_range", "reason", "sample"}, | |||||
{shape_range, kInputShapeRangeInvalid, kInputShapeRangeSample2}); | |||||
GELOGE(PARAM_INVALID, | |||||
"Parse input parameter [--input_shape_range]'s shape range[%s] failed, reason: %s, correct sample is %s.", | |||||
shape_range.c_str(), kInputShapeRangeInvalid, kInputShapeRangeSample2); | |||||
return false; | |||||
} | |||||
// trim start bytes, after that, single input should be "1~20,3,3~6,-1" | |||||
if (ge::StringUtils::StartWith(shape_range, "[")) { | |||||
shape_range = shape_range.substr(1, shape_range.size() - 1); | |||||
} | |||||
// parse shape_range of single input. eg. "1~20,3,3~6,-1" | |||||
vector<string> dim_range_set = ge::StringUtils::Split(shape_range, ','); | |||||
for (const auto &range_pair_str : dim_range_set) { | |||||
vector<string> range_pair_set = ge::StringUtils::Split(range_pair_str, '~'); | |||||
pair<int64_t, int64_t> range_pair; | |||||
if (range_pair_set.size() == 1) { | |||||
long range_value = 0; | |||||
if (!StringToLongNoThrow(range_pair_set.at(0), range_value)) { | |||||
return false; | |||||
} | |||||
if (range_value < 0) { | |||||
range_pair = std::make_pair(1, range_value); | |||||
} else { | |||||
range_pair = std::make_pair(range_value, range_value); | |||||
} | |||||
} else if (range_pair_set.size() == 2) { | |||||
// unknown dim, should get range. | |||||
long range_left = 0; | |||||
if (!StringToLongNoThrow(range_pair_set.at(0), range_left)) { | |||||
return false; | |||||
} | |||||
long range_right = 0; | |||||
if (!StringToLongNoThrow(range_pair_set.at(1), range_right)) { | |||||
return false; | |||||
} | |||||
if (range_left < 0 || (range_right < 0)) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10048", {"shape_range", "reason", "sample"}, | |||||
{shape_range, kInputShapeRangeInvalid, kInputShapeRangeSample3}); | |||||
GELOGE(PARAM_INVALID, | |||||
"Parse input parameter [--input_shape_range]'s shape range[%s] failed, reason: %s, correct sample is %s.", | |||||
shape_range.c_str(), kInputShapeRangeInvalid, kInputShapeRangeSample3); | |||||
return false; | |||||
} | |||||
range_pair = std::make_pair(range_left, range_right); | |||||
} else { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10048", {"shape_range", "reason", "sample"}, | |||||
{shape_range, kInputShapeRangeInvalid, kInputShapeRangeSample3}); | |||||
GELOGE(PARAM_INVALID, | |||||
"Parse input parameter [--input_shape_range]'s shape range[%s] failed, reason: %s, correct sample is %s.", | |||||
shape_range.c_str(), kInputShapeRangeInvalid, kInputShapeRangeSample3); | |||||
return false; | |||||
} | |||||
shape_range_vec.emplace_back(range_pair); | |||||
} | |||||
return true; | |||||
} | |||||
bool ParseInputShapeRange(const std::string &shape_range, | |||||
std::map<string, std::vector<std::pair<int64_t, int64_t>>> &shape_range_map) { | |||||
GELOGD("Input shape range %s", shape_range.c_str()); | |||||
vector<string> shape_range_vec = StringUtils::Split(shape_range, ';'); | |||||
const int DEFAULT_SHAPE_RANGE_PAIR_SIZE = 2; | |||||
for (const auto &shape_range_item : shape_range_vec) { | |||||
vector<string> shape_range_pair_vec = SplitInputShape(shape_range_item); | |||||
if (shape_range_pair_vec.size() != DEFAULT_SHAPE_RANGE_PAIR_SIZE) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10048", {"shape_range", "reason", "sample"}, | |||||
{shape_range, kSplitError1, kInputShapeRangeSample1}); | |||||
GELOGE(PARAM_INVALID, "Parse input parameter [--input_shape_range]'s shape range[%s] failed, " | |||||
"reason: %s, correct sample is %s.", shape_range.c_str(), kSplitError1, kInputShapeRangeSample1); | |||||
return false; | |||||
} | |||||
if (shape_range_pair_vec[1].empty()) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10048", {"shape", "reason", "sample"}, | |||||
{shape_range, kEmptyError, kInputShapeRangeSample1}); | |||||
GELOGE(PARAM_INVALID, "Parse input parameter [--input_shape_range]'s shape range[%s] failed," | |||||
"reason: %s, correct sample is %s.", shape_range.c_str(), kEmptyError, kInputShapeRangeSample1); | |||||
return false; | |||||
} | |||||
string shape_range_str = shape_range_pair_vec[1]; | |||||
vector<pair<int64_t, int64_t>> shape_range_val; | |||||
if (!ParseSingleShapeRange(shape_range_str, shape_range_val)) { | |||||
GELOGE(PARAM_INVALID, "Parse single shape range %s error.", shape_range_str.c_str()); | |||||
return false; | |||||
} | |||||
shape_range_map.emplace(make_pair(StringUtils::Trim(shape_range_pair_vec[0]), shape_range_val)); | |||||
} | |||||
return true; | |||||
} | |||||
Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_image_size, string &dynamic_dims, | Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_image_size, string &dynamic_dims, | ||||
const string input_shape, const string input_format, bool &is_dynamic_input) { | |||||
const string input_shape, const string input_shape_range, const string input_format, | |||||
bool &is_dynamic_input) { | |||||
int32_t param_size = static_cast<int32_t>(!dynamic_batch_size.empty()) + | int32_t param_size = static_cast<int32_t>(!dynamic_batch_size.empty()) + | ||||
static_cast<int32_t>(!dynamic_image_size.empty()) + static_cast<int32_t>(!dynamic_dims.empty()); | static_cast<int32_t>(!dynamic_image_size.empty()) + static_cast<int32_t>(!dynamic_dims.empty()); | ||||
if (param_size > 1) { | if (param_size > 1) { | ||||
@@ -269,6 +400,13 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i | |||||
} | } | ||||
if (param_size == 0) { | if (param_size == 0) { | ||||
if (!input_shape_range.empty()) { | |||||
std::map<string, std::vector<std::pair<int64_t, int64_t>>> shape_range_map; | |||||
if(!ParseInputShapeRange(input_shape_range, shape_range_map)) { | |||||
GELOGE(ge::PARAM_INVALID, "Failed to parse input shape range: %s", input_shape_range.c_str()); | |||||
return ge::PARAM_INVALID; | |||||
} | |||||
} | |||||
return ge::SUCCESS; | return ge::SUCCESS; | ||||
} | } | ||||
@@ -546,4 +684,91 @@ void EraseEndSemicolon(string ¶m) { | |||||
param.erase(param.end() - 1); | param.erase(param.end() - 1); | ||||
} | } | ||||
} | } | ||||
Status UpdateDataOpShape(const OpDescPtr &op, map<string, vector<int64_t>> &shape_map) { | |||||
GE_CHECK_NOTNULL(op); | |||||
if (shape_map.empty()) { | |||||
GELOGI("Shape map of data op [%s] is empty, no need to update.", op->GetName().c_str()); | |||||
return SUCCESS; | |||||
} | |||||
auto tensor_input = op->MutableInputDesc(0); | |||||
auto tensor_output = op->MutableOutputDesc(0); | |||||
GE_CHECK_NOTNULL(tensor_input); | |||||
GE_CHECK_NOTNULL(tensor_output); | |||||
string data_op_name = op->GetName(); | |||||
auto iter = shape_map.find(data_op_name); | |||||
if (iter != shape_map.end()) { | |||||
tensor_input->SetShape(ge::GeShape(iter->second)); | |||||
tensor_output->SetShape(ge::GeShape(iter->second)); | |||||
GELOGI("Update input [%s] shape info", data_op_name.c_str()); | |||||
} else { | |||||
GELOGI("No need update input [%s] attr because not found from input_shape.", data_op_name.c_str()); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status UpdateDataOpShapeRange(const OpDescPtr &op, | |||||
map<string, vector<pair<int64_t, int64_t>>> &shape_range_map) { | |||||
GE_CHECK_NOTNULL(op); | |||||
if (shape_range_map.empty()) { | |||||
GELOGI("Shape range map of data op [%s] is empty.", op->GetName().c_str()); | |||||
return SUCCESS; | |||||
} | |||||
auto tensor_input = op->MutableInputDesc(0); | |||||
GE_CHECK_NOTNULL(tensor_input); | |||||
string data_op_name = op->GetName(); | |||||
auto origin_shape = tensor_input->GetShape(); | |||||
auto iter = shape_range_map.find(data_op_name); | |||||
if (iter != shape_range_map.end()) { | |||||
auto cur_shape_range = iter->second; | |||||
if (TensorUtils::CheckShapeByShapeRange(origin_shape, cur_shape_range) != SUCCESS) { | |||||
GELOGE(PARAM_INVALID, "[%s] Check shape by shape range failed.", op->GetName().c_str()); | |||||
return PARAM_INVALID; | |||||
} | |||||
for (size_t idx = 0; idx < cur_shape_range.size(); idx++) { | |||||
auto left_range = cur_shape_range[idx].first; | |||||
auto right_range = cur_shape_range[idx].second; | |||||
if (left_range != right_range) { | |||||
origin_shape.SetDim(idx, UNKNOWN_DIM); | |||||
} | |||||
} | |||||
tensor_input->SetShape(origin_shape); | |||||
tensor_input->SetShapeRange(cur_shape_range); | |||||
GELOGI("Update input [%s] shape range info", data_op_name.c_str()); | |||||
} else { | |||||
GELOGI("No need to update input [%s] attr because not found from input_shape_range.", data_op_name.c_str()); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status UpdateDynamicInputShapeRange(const ge::ComputeGraphPtr &compute_graph, const string &input_shape_range) { | |||||
if (input_shape_range.empty()) { | |||||
return SUCCESS; | |||||
} | |||||
GE_CHECK_NOTNULL(compute_graph); | |||||
map<string, vector<pair<int64_t, int64_t>>> shape_range_map; | |||||
if (!ParseInputShapeRange(input_shape_range, shape_range_map)) { | |||||
GELOGE(PARAM_INVALID, "Parse input shape range failed."); | |||||
return PARAM_INVALID; | |||||
} | |||||
for (NodePtr &input_node : compute_graph->GetDirectNode()) { | |||||
GE_CHECK_NOTNULL(input_node); | |||||
OpDescPtr op = input_node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op); | |||||
if (op->GetType() == DATA) { | |||||
if (UpdateDataOpShapeRange(op, shape_range_map) != SUCCESS) { | |||||
GELOGE(FAILED, "Update data op [%s] input shape range failed.", op->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
} // namespace ge | } // namespace ge |
@@ -31,7 +31,7 @@ | |||||
namespace ge { | namespace ge { | ||||
static std::set<std::string> caffe_support_input_format = {"NCHW", "ND"}; | static std::set<std::string> caffe_support_input_format = {"NCHW", "ND"}; | ||||
static std::set<std::string> tf_support_input_format = {"NCHW", "NHWC", "ND", "NCDHW", "NDHWC"}; | static std::set<std::string> tf_support_input_format = {"NCHW", "NHWC", "ND", "NCDHW", "NDHWC"}; | ||||
static std::set<std::string> onnx_support_input_format = {"NCHW", "ND"}; | |||||
static std::set<std::string> onnx_support_input_format = {"NCHW", "ND", "NCDHW"}; | |||||
static std::map<std::string, domiTensorFormat_t> input_format_str_to_geformat = { | static std::map<std::string, domiTensorFormat_t> input_format_str_to_geformat = { | ||||
{"ND", domi::DOMI_TENSOR_ND}, | {"ND", domi::DOMI_TENSOR_ND}, | ||||
@@ -59,10 +59,13 @@ bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims | |||||
Status CheckDynamicInputParamValid(std::string &dynamic_batch_size, std::string &dynamic_image_size, | Status CheckDynamicInputParamValid(std::string &dynamic_batch_size, std::string &dynamic_image_size, | ||||
std::string &dynamic_dims, const std::string input_shape, | std::string &dynamic_dims, const std::string input_shape, | ||||
const std::string input_format, bool &is_dynamic_input); | |||||
const std::string input_shape_range, const std::string input_format, | |||||
bool &is_dynamic_input); | |||||
bool ParseInputShape(const std::string &input_shape, std::map<string, std::vector<int64_t>> &shape_map, | bool ParseInputShape(const std::string &input_shape, std::map<string, std::vector<int64_t>> &shape_map, | ||||
std::vector<std::pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input = false); | std::vector<std::pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input = false); | ||||
bool ParseInputShapeRange(const std::string &shape_range, | |||||
std::map<string, std::vector<std::pair<int64_t, int64_t>>> &shape_range_map); | |||||
Status CheckOutputTypeParamValid(const std::string output_type); | Status CheckOutputTypeParamValid(const std::string output_type); | ||||
Status CheckBufferOptimizeParamValid(const std::string buffer_optimize); | Status CheckBufferOptimizeParamValid(const std::string buffer_optimize); | ||||
@@ -76,5 +79,9 @@ Status CheckInputFormat(const string &input_format); | |||||
Status CheckKeepTypeParamValid(const std::string &keep_dtype); | Status CheckKeepTypeParamValid(const std::string &keep_dtype); | ||||
void PrintOptionMap(std::map<std::string, std::string> &options, std::string tips); | void PrintOptionMap(std::map<std::string, std::string> &options, std::string tips); | ||||
void EraseEndSemicolon(std::string ¶m); | void EraseEndSemicolon(std::string ¶m); | ||||
Status UpdateDataOpShape(const OpDescPtr &op, std::map<std::string, std::vector<int64_t>> &shape_map); | |||||
Status UpdateDataOpShapeRange(const OpDescPtr &op, | |||||
std::map<std::string, std::vector<std::pair<int64_t, int64_t>>> &shape_range_map); | |||||
Status UpdateDynamicInputShapeRange(const ge::ComputeGraphPtr &compute_graph, const string &input_shape_range); | |||||
} | } | ||||
#endif // FRAMEWORK_DOMI_ATC_IR_COMMON_H_ | #endif // FRAMEWORK_DOMI_ATC_IR_COMMON_H_ |
@@ -55,6 +55,7 @@ const std::string IR_OPTION_DISABLE_REUSE_MEMORY_DEFAULT = "0"; | |||||
const std::string IR_OPTION_ENABLE_COMPRESS_WEIGHT_DEFAULT = "false"; | const std::string IR_OPTION_ENABLE_COMPRESS_WEIGHT_DEFAULT = "false"; | ||||
const std::string KEEP_DTYPE_OPTION = "keep_dtype"; | const std::string KEEP_DTYPE_OPTION = "keep_dtype"; | ||||
const std::string kInputShape = "input_shape"; | const std::string kInputShape = "input_shape"; | ||||
const std::string kInputShapeRange = "input_shape_range"; | |||||
const std::string kInputFormat = "input_format"; | const std::string kInputFormat = "input_format"; | ||||
/** | /** | ||||
@@ -289,13 +290,20 @@ graphStatus Impl::InferShapePrepare(const ComputeGraphPtr &compute_graph) { | |||||
graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { | graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { | ||||
GELOGD("Enter Update Data Attr Process!"); | GELOGD("Enter Update Data Attr Process!"); | ||||
if (options_.find(kInputShape) == options_.end()) { | |||||
return GRAPH_SUCCESS; | |||||
} | |||||
std::string input_shape = (options_.find(kInputShape) == options_.end()) ? "" : options_[kInputShape]; | |||||
std::string input_shape_range = (options_.find(kInputShapeRange) == options_.end()) ? "" : options_[kInputShapeRange]; | |||||
map<string, vector<int64_t>> shape_map; | map<string, vector<int64_t>> shape_map; | ||||
vector<pair<string, vector<int64_t>>> user_shape_map; | vector<pair<string, vector<int64_t>>> user_shape_map; | ||||
GE_CHK_BOOL_EXEC(ParseInputShape(options_[kInputShape], shape_map, user_shape_map, true), | |||||
return GRAPH_PARAM_INVALID, "parse input shape failed!"); | |||||
if (!input_shape.empty()) { | |||||
GE_CHK_BOOL_EXEC(ParseInputShape(input_shape, shape_map, user_shape_map, true), | |||||
return GRAPH_PARAM_INVALID, "Parse input shape failed!"); | |||||
} | |||||
std::map<string, std::vector<std::pair<int64_t, int64_t>>> shape_range_map; | |||||
if (!input_shape_range.empty()) { | |||||
GE_CHK_BOOL_EXEC(ParseInputShapeRange(input_shape_range, shape_range_map), | |||||
return GRAPH_PARAM_INVALID, "Parse input shape range failed."); | |||||
} | |||||
auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); | auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); | ||||
GE_CHECK_NOTNULL(compute_graph); | GE_CHECK_NOTNULL(compute_graph); | ||||
for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { | for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { | ||||
@@ -303,21 +311,17 @@ graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { | |||||
ge::OpDescPtr op = input_node->GetOpDesc(); | ge::OpDescPtr op = input_node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op); | GE_CHECK_NOTNULL(op); | ||||
if (op->GetType() == DATA) { | if (op->GetType() == DATA) { | ||||
auto tensor_input = op->MutableInputDesc(0); | |||||
auto tensor_output = op->MutableOutputDesc(0); | |||||
GE_CHECK_NOTNULL(tensor_input); | |||||
GE_CHECK_NOTNULL(tensor_output); | |||||
string data_op_name = op->GetName(); | |||||
auto iter = shape_map.find(data_op_name); | |||||
if (iter != shape_map.end()) { | |||||
tensor_input->SetShape(ge::GeShape(iter->second)); | |||||
tensor_output->SetShape(ge::GeShape(iter->second)); | |||||
GELOGD("update input [%s] shape info", data_op_name.c_str()); | |||||
} else { | |||||
GELOGI("no need update input [%s] attr because not found from input_shape.", data_op_name.c_str()); | |||||
if (UpdateDataOpShape(op, shape_map) != SUCCESS) { | |||||
GELOGE(GRAPH_FAILED, "Update data op [%s] shape failed.", op->GetName().c_str()); | |||||
return GRAPH_FAILED; | |||||
} | |||||
if (UpdateDataOpShapeRange(op, shape_range_map) != SUCCESS) { | |||||
GELOGE(GRAPH_FAILED, "Update data op [%s] shape range failed.", op->GetName().c_str()); | |||||
return GRAPH_FAILED; | |||||
} | } | ||||
} | } | ||||
} | } | ||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
@@ -400,9 +404,11 @@ graphStatus Impl::Init(const Graph &graph, const std::map<std::string, std::stri | |||||
: options_[ge::ir_option::DYNAMIC_IMAGE_SIZE]; | : options_[ge::ir_option::DYNAMIC_IMAGE_SIZE]; | ||||
string dynamic_dims = | string dynamic_dims = | ||||
options_.find(ge::ir_option::DYNAMIC_DIMS) == options_.end() ? "" : options_[ge::ir_option::DYNAMIC_DIMS]; | options_.find(ge::ir_option::DYNAMIC_DIMS) == options_.end() ? "" : options_[ge::ir_option::DYNAMIC_DIMS]; | ||||
string input_shape_range = | |||||
options_.find(ge::INPUT_SHAPE_RANGE) == options_.end() ? "" : options_[ge::INPUT_SHAPE_RANGE]; | |||||
auto status = CheckDynamicInputParamValid(dynamic_batch_size, dynamic_image_size, dynamic_dims, input_shape, | auto status = CheckDynamicInputParamValid(dynamic_batch_size, dynamic_image_size, dynamic_dims, input_shape, | ||||
input_format, is_dynamic_input_); | |||||
input_shape_range, input_format, is_dynamic_input_); | |||||
if (status != ge::SUCCESS) { | if (status != ge::SUCCESS) { | ||||
GELOGE(GRAPH_PARAM_INVALID, "Check dynamic input size failed!"); | GELOGE(GRAPH_PARAM_INVALID, "Check dynamic input size failed!"); | ||||
return GRAPH_PARAM_INVALID; | return GRAPH_PARAM_INVALID; | ||||
@@ -70,7 +70,7 @@ const char *const kModeSupport = "only support 0(model to framework model), " | |||||
const char *const kModelToJsonSupport = "only support 0(Caffe) 3(TensorFlow) 5(Onnx)"; | const char *const kModelToJsonSupport = "only support 0(Caffe) 3(TensorFlow) 5(Onnx)"; | ||||
const char *const kCaffeFormatSupport = "only support NCHW, ND in Caffe model"; | const char *const kCaffeFormatSupport = "only support NCHW, ND in Caffe model"; | ||||
const char *const kTFFormatSupport = "only support NCHW, NHWC, ND, NCDHW, NDHWC in TF model"; | const char *const kTFFormatSupport = "only support NCHW, NHWC, ND, NCDHW, NDHWC in TF model"; | ||||
const char *const kONNXFormatSupport = "only support NCHW, ND in ONNX model"; | |||||
const char *const kONNXFormatSupport = "only support NCHW, ND, NCDHW in ONNX model"; | |||||
// limit available mem size 2G | // limit available mem size 2G | ||||
const long kMinAvailableMem = 2097152; // 2 * 1024 * 1024 | const long kMinAvailableMem = 2097152; // 2 * 1024 * 1024 | ||||
} // namespace | } // namespace | ||||
@@ -84,6 +84,10 @@ DEFINE_string(input_shape, "", | |||||
"Optional; shape of input data. Required when framework is caffe " | "Optional; shape of input data. Required when framework is caffe " | ||||
"or TensorFLow or MindSpore or Onnx. " | "or TensorFLow or MindSpore or Onnx. " | ||||
"Format: \"input_name1:n1,c1,h1,w1;input_name2:n2,c2,h2,w2\""); | "Format: \"input_name1:n1,c1,h1,w1;input_name2:n2,c2,h2,w2\""); | ||||
DEFINE_string(input_shape_range, "", | |||||
"Optional; shape range of input data. Required when framework is caffe " | |||||
"or TensorFLow or Onnx. " | |||||
"Format: \"input_name1:[n1~n2,c1,h1,w1];input_name2:[n2~n3,c2,h2,w2]\""); | |||||
DEFINE_bool(h, false, "show this help message"); | DEFINE_bool(h, false, "show this help message"); | ||||
DEFINE_string(cal_conf, "", "Optional; the calibration config file."); | DEFINE_string(cal_conf, "", "Optional; the calibration config file."); | ||||
@@ -240,6 +244,7 @@ class GFlagUtils { | |||||
" --framework Framework type. 0:Caffe; 1:MindSpore; 3:Tensorflow; 5:Onnx\n" | " --framework Framework type. 0:Caffe; 1:MindSpore; 3:Tensorflow; 5:Onnx\n" | ||||
" --input_format Format of input data. E.g.: \"NCHW\"\n" | " --input_format Format of input data. E.g.: \"NCHW\"\n" | ||||
" --input_shape Shape of input data. Separate multiple nodes with semicolons (;). " | " --input_shape Shape of input data. Separate multiple nodes with semicolons (;). " | ||||
" --input_shape_range Shape range of input data. Separate multiple nodes with semicolons (;)." | |||||
"Use double quotation marks (\") to enclose each argument.\n" | "Use double quotation marks (\") to enclose each argument.\n" | ||||
" E.g.: \"input_name1:n1,c1,h1,w1;input_name2:n2,c2,h2,w2\"\n" | " E.g.: \"input_name1:n1,c1,h1,w1;input_name2:n2,c2,h2,w2\"\n" | ||||
" --dynamic_batch_size Set dynamic batch size. E.g.: \"batchsize1,batchsize2,batchsize3\"\n" | " --dynamic_batch_size Set dynamic batch size. E.g.: \"batchsize1,batchsize2,batchsize3\"\n" | ||||
@@ -373,7 +378,7 @@ class GFlagUtils { | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
ge::CheckDynamicInputParamValid(FLAGS_dynamic_batch_size, FLAGS_dynamic_image_size, | ge::CheckDynamicInputParamValid(FLAGS_dynamic_batch_size, FLAGS_dynamic_image_size, | ||||
FLAGS_dynamic_dims, FLAGS_input_shape, | |||||
FLAGS_dynamic_dims, FLAGS_input_shape, FLAGS_input_shape_range, | |||||
FLAGS_input_format, is_dynamic_input) != ge::SUCCESS, | FLAGS_input_format, is_dynamic_input) != ge::SUCCESS, | ||||
ret = ge::FAILED, "check dynamic size(batch size, image size or dims) failed!"); | ret = ge::FAILED, "check dynamic size(batch size, image size or dims) failed!"); | ||||
@@ -985,6 +990,7 @@ domi::Status GenerateModel(std::map<string, string> &options, std::string output | |||||
} else { | } else { | ||||
std::map<string, string> atc_params; | std::map<string, string> atc_params; | ||||
atc_params.insert(std::pair<string, string>("input_shape", FLAGS_input_shape)); | atc_params.insert(std::pair<string, string>("input_shape", FLAGS_input_shape)); | ||||
atc_params.insert(std::pair<string, string>(ge::INPUT_SHAPE_RANGE, FLAGS_input_shape_range)); | |||||
atc_params.insert(std::pair<string, string>("out_nodes", FLAGS_out_nodes)); | atc_params.insert(std::pair<string, string>("out_nodes", FLAGS_out_nodes)); | ||||
atc_params.insert(std::pair<string, string>("input_format", FLAGS_input_format)); | atc_params.insert(std::pair<string, string>("input_format", FLAGS_input_format)); | ||||
atc_params.insert(std::pair<string, string>("check_report", FLAGS_check_report)); | atc_params.insert(std::pair<string, string>("check_report", FLAGS_check_report)); | ||||
@@ -576,6 +576,7 @@ Status InitDomiOmgContext(const string &input_shape, const string &input_format, | |||||
GELOGE(PARAM_INVALID, "Failed to parse input shape: %s", input_shape.c_str()); | GELOGE(PARAM_INVALID, "Failed to parse input shape: %s", input_shape.c_str()); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -788,6 +789,12 @@ FMK_FUNC_HOST_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<stri | |||||
GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC weights parse ret fail."); | GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC weights parse ret fail."); | ||||
// parser input shape range and update op shape range | |||||
std::string input_shape_range; | |||||
ParseAtcParms(atc_params, INPUT_SHAPE_RANGE, input_shape_range); | |||||
GE_RETURN_WITH_LOG_IF_ERROR(UpdateDynamicInputShapeRange(compute_graph, input_shape_range), | |||||
"Update input shape range failed"); | |||||
GELOGI("ATC parser success."); | GELOGI("ATC parser success."); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -136,50 +136,49 @@ static const int ACL_ERROR_PROFILING_FAILURE = 500005; | |||||
#define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE | #define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE | ||||
typedef enum { | typedef enum { | ||||
ACL_DT_UNDEFINED = -1, | |||||
ACL_FLOAT = 0, | |||||
ACL_FLOAT16 = 1, | |||||
ACL_INT8 = 2, | |||||
ACL_INT32 = 3, | |||||
ACL_UINT8 = 4, | |||||
ACL_INT16 = 6, | |||||
ACL_UINT16 = 7, | |||||
ACL_UINT32 = 8, | |||||
ACL_INT64 = 9, | |||||
ACL_UINT64 = 10, | |||||
ACL_DOUBLE = 11, | |||||
ACL_BOOL = 12, | |||||
ACL_STRING = 13, | |||||
ACL_DT_UNDEFINED = -1, | |||||
ACL_FLOAT = 0, | |||||
ACL_FLOAT16 = 1, | |||||
ACL_INT8 = 2, | |||||
ACL_INT32 = 3, | |||||
ACL_UINT8 = 4, | |||||
ACL_INT16 = 6, | |||||
ACL_UINT16 = 7, | |||||
ACL_UINT32 = 8, | |||||
ACL_INT64 = 9, | |||||
ACL_UINT64 = 10, | |||||
ACL_DOUBLE = 11, | |||||
ACL_BOOL = 12, | |||||
ACL_STRING = 13, | |||||
} aclDataType; | } aclDataType; | ||||
typedef enum { | typedef enum { | ||||
ACL_FORMAT_UNDEFINED = -1, | |||||
ACL_FORMAT_NCHW = 0, | |||||
ACL_FORMAT_NHWC = 1, | |||||
ACL_FORMAT_ND = 2, | |||||
ACL_FORMAT_NC1HWC0 = 3, | |||||
ACL_FORMAT_FRACTAL_Z = 4, | |||||
ACL_FORMAT_NC1HWC0_C04 = 12, | |||||
ACL_FORMAT_NDHWC = 27, | |||||
ACL_FORMAT_FRACTAL_NZ = 29, | |||||
ACL_FORMAT_NCDHW = 30, | |||||
ACL_FORMAT_NDC1HWC0 = 32, | |||||
ACL_FRACTAL_Z_3D = 33 | |||||
ACL_FORMAT_UNDEFINED = -1, | |||||
ACL_FORMAT_NCHW = 0, | |||||
ACL_FORMAT_NHWC = 1, | |||||
ACL_FORMAT_ND = 2, | |||||
ACL_FORMAT_NC1HWC0 = 3, | |||||
ACL_FORMAT_FRACTAL_Z = 4, | |||||
ACL_FORMAT_NC1HWC0_C04 = 12, | |||||
ACL_FORMAT_NDHWC = 27, | |||||
ACL_FORMAT_FRACTAL_NZ = 29, | |||||
ACL_FORMAT_NCDHW = 30, | |||||
ACL_FORMAT_NDC1HWC0 = 32, | |||||
ACL_FRACTAL_Z_3D = 33 | |||||
} aclFormat; | } aclFormat; | ||||
typedef enum { | typedef enum { | ||||
ACL_DEBUG = 0, | |||||
ACL_INFO = 1, | |||||
ACL_WARNING = 2, | |||||
ACL_ERROR = 3, | |||||
ACL_DEBUG = 0, | |||||
ACL_INFO = 1, | |||||
ACL_WARNING = 2, | |||||
ACL_ERROR = 3, | |||||
} aclLogLevel; | } aclLogLevel; | ||||
typedef enum { | typedef enum { | ||||
ACL_MEMTYPE_DEVICE = 0, | |||||
ACL_MEMTYPE_HOST = 1, | |||||
ACL_MEMTYPE_DEVICE = 0, | |||||
ACL_MEMTYPE_HOST = 1, | |||||
} aclMemType; | } aclMemType; | ||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
* @brief Converts data of type aclFloat16 to data of type float | * @brief Converts data of type aclFloat16 to data of type float | ||||
@@ -312,9 +311,7 @@ ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType); | |||||
* @retval aclTensorDesc pointer. | * @retval aclTensorDesc pointer. | ||||
* @retval nullptr if param is invalid or run out of memory | * @retval nullptr if param is invalid or run out of memory | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, | |||||
int numDims, | |||||
const int64_t *dims, | |||||
ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, | |||||
aclFormat format); | aclFormat format); | ||||
/** | /** | ||||
@@ -336,8 +333,7 @@ ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc); | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc* desc, | |||||
size_t dimsCount, | |||||
ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount, | |||||
int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]); | int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]); | ||||
/** | /** | ||||
@@ -434,9 +430,7 @@ ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, si | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, | |||||
size_t index, | |||||
size_t dimRangeNum, | |||||
ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, size_t index, size_t dimRangeNum, | |||||
int64_t *dimRange); | int64_t *dimRange); | ||||
/** | /** | ||||
@@ -473,7 +467,7 @@ ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc); | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat, | ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat, | ||||
aclTensorDesc **dstDesc); | |||||
aclTensorDesc **dstDesc); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -561,7 +555,7 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int nu | |||||
* | * | ||||
* @retval null for failed. | * @retval null for failed. | ||||
* @retval OtherValues success. | * @retval OtherValues success. | ||||
*/ | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index); | ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index); | ||||
/** | /** | ||||
@@ -572,7 +566,7 @@ ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, | |||||
* | * | ||||
* @retval null for failed | * @retval null for failed | ||||
* @retval OtherValues success | * @retval OtherValues success | ||||
*/ | |||||
*/ | |||||
ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc); | ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc); | ||||
/** | /** | ||||
@@ -624,13 +618,12 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorPlaceMent(aclTensorDesc *desc, aclMemTy | |||||
* @param ... [IN] the value of current log | * @param ... [IN] the value of current log | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line, | ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line, | ||||
const char *fmt, ...); | |||||
const char *fmt, ...); | |||||
#define ACL_APP_LOG(level, fmt, ...) \ | |||||
aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__) | |||||
#define ACL_APP_LOG(level, fmt, ...) aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__) | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif | #endif | ||||
#endif // INC_EXTERNAL_ACL_ACL_BASE_H_ | |||||
#endif // INC_EXTERNAL_ACL_ACL_BASE_H_ |
@@ -27,19 +27,19 @@ | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
#define ACL_MAX_DIM_CNT 128 | |||||
#define ACL_MAX_TENSOR_NAME_LEN 128 | |||||
#define ACL_MAX_BATCH_NUM 128 | |||||
#define ACL_MAX_HW_NUM 128 | |||||
#define ACL_MAX_SHAPE_COUNT 128 | |||||
#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF | |||||
#define ACL_MDL_LOAD_FROM_FILE 1 | |||||
#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2 | |||||
#define ACL_MDL_LOAD_FROM_MEM 3 | |||||
#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4 | |||||
#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5 | |||||
#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6 | |||||
#define ACL_MAX_DIM_CNT 128 | |||||
#define ACL_MAX_TENSOR_NAME_LEN 128 | |||||
#define ACL_MAX_BATCH_NUM 128 | |||||
#define ACL_MAX_HW_NUM 128 | |||||
#define ACL_MAX_SHAPE_COUNT 128 | |||||
#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF | |||||
#define ACL_MDL_LOAD_FROM_FILE 1 | |||||
#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2 | |||||
#define ACL_MDL_LOAD_FROM_MEM 3 | |||||
#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4 | |||||
#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5 | |||||
#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6 | |||||
#define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data" | #define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data" | ||||
#define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data" | #define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data" | ||||
@@ -51,123 +51,123 @@ typedef struct aclAippExtendInfo aclAippExtendInfo; | |||||
typedef struct aclmdlConfigHandle aclmdlConfigHandle; | typedef struct aclmdlConfigHandle aclmdlConfigHandle; | ||||
typedef enum { | typedef enum { | ||||
ACL_YUV420SP_U8 = 1, | |||||
ACL_XRGB8888_U8, | |||||
ACL_RGB888_U8, | |||||
ACL_YUV400_U8, | |||||
ACL_NC1HWC0DI_FP16, | |||||
ACL_NC1HWC0DI_S8, | |||||
ACL_ARGB8888_U8, | |||||
ACL_YUYV_U8, | |||||
ACL_YUV422SP_U8, | |||||
ACL_AYUV444_U8, | |||||
ACL_RAW10, | |||||
ACL_RAW12, | |||||
ACL_RAW16, | |||||
ACL_RAW24, | |||||
ACL_AIPP_RESERVED = 0xffff, | |||||
ACL_YUV420SP_U8 = 1, | |||||
ACL_XRGB8888_U8, | |||||
ACL_RGB888_U8, | |||||
ACL_YUV400_U8, | |||||
ACL_NC1HWC0DI_FP16, | |||||
ACL_NC1HWC0DI_S8, | |||||
ACL_ARGB8888_U8, | |||||
ACL_YUYV_U8, | |||||
ACL_YUV422SP_U8, | |||||
ACL_AYUV444_U8, | |||||
ACL_RAW10, | |||||
ACL_RAW12, | |||||
ACL_RAW16, | |||||
ACL_RAW24, | |||||
ACL_AIPP_RESERVED = 0xffff, | |||||
} aclAippInputFormat; | } aclAippInputFormat; | ||||
typedef enum { | typedef enum { | ||||
ACL_MDL_PRIORITY_INT32 = 0, | |||||
ACL_MDL_LOAD_TYPE_SIZET, | |||||
ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */ | |||||
ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */ | |||||
ACL_MDL_MEM_SIZET, | |||||
ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */ | |||||
ACL_MDL_WEIGHT_SIZET, | |||||
ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */ | |||||
ACL_MDL_WORKSPACE_SIZET, | |||||
ACL_MDL_INPUTQ_NUM_SIZET, | |||||
ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */ | |||||
ACL_MDL_OUTPUTQ_NUM_SIZET, | |||||
ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */ | |||||
ACL_MDL_PRIORITY_INT32 = 0, | |||||
ACL_MDL_LOAD_TYPE_SIZET, | |||||
ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */ | |||||
ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */ | |||||
ACL_MDL_MEM_SIZET, | |||||
ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */ | |||||
ACL_MDL_WEIGHT_SIZET, | |||||
ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */ | |||||
ACL_MDL_WORKSPACE_SIZET, | |||||
ACL_MDL_INPUTQ_NUM_SIZET, | |||||
ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */ | |||||
ACL_MDL_OUTPUTQ_NUM_SIZET, | |||||
ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */ | |||||
} aclmdlConfigAttr; | } aclmdlConfigAttr; | ||||
typedef enum { | typedef enum { | ||||
ACL_DATA_WITHOUT_AIPP = 0, | |||||
ACL_DATA_WITH_STATIC_AIPP, | |||||
ACL_DATA_WITH_DYNAMIC_AIPP, | |||||
ACL_DYNAMIC_AIPP_NODE | |||||
ACL_DATA_WITHOUT_AIPP = 0, | |||||
ACL_DATA_WITH_STATIC_AIPP, | |||||
ACL_DATA_WITH_DYNAMIC_AIPP, | |||||
ACL_DYNAMIC_AIPP_NODE | |||||
} aclmdlInputAippType; | } aclmdlInputAippType; | ||||
typedef struct aclmdlIODims { | typedef struct aclmdlIODims { | ||||
char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */ | |||||
size_t dimCount; /**< dim array count */ | |||||
int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */ | |||||
char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */ | |||||
size_t dimCount; /**< dim array count */ | |||||
int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */ | |||||
} aclmdlIODims; | } aclmdlIODims; | ||||
typedef struct aclAippDims { | typedef struct aclAippDims { | ||||
aclmdlIODims srcDims; /**< input dims before model transform */ | |||||
size_t srcSize; /**< input size before model transform */ | |||||
aclmdlIODims aippOutdims; /**< aipp output dims */ | |||||
size_t aippOutSize; /**< aipp output size */ | |||||
aclmdlIODims srcDims; /**< input dims before model transform */ | |||||
size_t srcSize; /**< input size before model transform */ | |||||
aclmdlIODims aippOutdims; /**< aipp output dims */ | |||||
size_t aippOutSize; /**< aipp output size */ | |||||
} aclAippDims; | } aclAippDims; | ||||
typedef struct aclmdlBatch { | typedef struct aclmdlBatch { | ||||
size_t batchCount; /**< batch array count */ | |||||
uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */ | |||||
size_t batchCount; /**< batch array count */ | |||||
uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */ | |||||
} aclmdlBatch; | } aclmdlBatch; | ||||
typedef struct aclmdlHW { | typedef struct aclmdlHW { | ||||
size_t hwCount; /**< height&width array count */ | |||||
uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */ | |||||
size_t hwCount; /**< height&width array count */ | |||||
uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */ | |||||
} aclmdlHW; | } aclmdlHW; | ||||
typedef struct aclAippInfo { | typedef struct aclAippInfo { | ||||
aclAippInputFormat inputFormat; | |||||
int32_t srcImageSizeW; | |||||
int32_t srcImageSizeH; | |||||
int8_t cropSwitch; | |||||
int32_t loadStartPosW; | |||||
int32_t loadStartPosH; | |||||
int32_t cropSizeW; | |||||
int32_t cropSizeH; | |||||
int8_t resizeSwitch; | |||||
int32_t resizeOutputW; | |||||
int32_t resizeOutputH; | |||||
int8_t paddingSwitch; | |||||
int32_t leftPaddingSize; | |||||
int32_t rightPaddingSize; | |||||
int32_t topPaddingSize; | |||||
int32_t bottomPaddingSize; | |||||
int8_t cscSwitch; | |||||
int8_t rbuvSwapSwitch; | |||||
int8_t axSwapSwitch; | |||||
int8_t singleLineMode; | |||||
int32_t matrixR0C0; | |||||
int32_t matrixR0C1; | |||||
int32_t matrixR0C2; | |||||
int32_t matrixR1C0; | |||||
int32_t matrixR1C1; | |||||
int32_t matrixR1C2; | |||||
int32_t matrixR2C0; | |||||
int32_t matrixR2C1; | |||||
int32_t matrixR2C2; | |||||
int32_t outputBias0; | |||||
int32_t outputBias1; | |||||
int32_t outputBias2; | |||||
int32_t inputBias0; | |||||
int32_t inputBias1; | |||||
int32_t inputBias2; | |||||
int32_t meanChn0; | |||||
int32_t meanChn1; | |||||
int32_t meanChn2; | |||||
int32_t meanChn3; | |||||
float minChn0; | |||||
float minChn1; | |||||
float minChn2; | |||||
float minChn3; | |||||
float varReciChn0; | |||||
float varReciChn1; | |||||
float varReciChn2; | |||||
float varReciChn3; | |||||
aclFormat srcFormat; | |||||
aclDataType srcDatatype; | |||||
size_t srcDimNum; | |||||
size_t shapeCount; | |||||
aclAippDims outDims[ACL_MAX_SHAPE_COUNT]; | |||||
aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */ | |||||
aclAippInputFormat inputFormat; | |||||
int32_t srcImageSizeW; | |||||
int32_t srcImageSizeH; | |||||
int8_t cropSwitch; | |||||
int32_t loadStartPosW; | |||||
int32_t loadStartPosH; | |||||
int32_t cropSizeW; | |||||
int32_t cropSizeH; | |||||
int8_t resizeSwitch; | |||||
int32_t resizeOutputW; | |||||
int32_t resizeOutputH; | |||||
int8_t paddingSwitch; | |||||
int32_t leftPaddingSize; | |||||
int32_t rightPaddingSize; | |||||
int32_t topPaddingSize; | |||||
int32_t bottomPaddingSize; | |||||
int8_t cscSwitch; | |||||
int8_t rbuvSwapSwitch; | |||||
int8_t axSwapSwitch; | |||||
int8_t singleLineMode; | |||||
int32_t matrixR0C0; | |||||
int32_t matrixR0C1; | |||||
int32_t matrixR0C2; | |||||
int32_t matrixR1C0; | |||||
int32_t matrixR1C1; | |||||
int32_t matrixR1C2; | |||||
int32_t matrixR2C0; | |||||
int32_t matrixR2C1; | |||||
int32_t matrixR2C2; | |||||
int32_t outputBias0; | |||||
int32_t outputBias1; | |||||
int32_t outputBias2; | |||||
int32_t inputBias0; | |||||
int32_t inputBias1; | |||||
int32_t inputBias2; | |||||
int32_t meanChn0; | |||||
int32_t meanChn1; | |||||
int32_t meanChn2; | |||||
int32_t meanChn3; | |||||
float minChn0; | |||||
float minChn1; | |||||
float minChn2; | |||||
float minChn3; | |||||
float varReciChn0; | |||||
float varReciChn1; | |||||
float varReciChn2; | |||||
float varReciChn3; | |||||
aclFormat srcFormat; | |||||
aclDataType srcDatatype; | |||||
size_t srcDimNum; | |||||
size_t shapeCount; | |||||
aclAippDims outDims[ACL_MAX_SHAPE_COUNT]; | |||||
aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */ | |||||
} aclAippInfo; | } aclAippInfo; | ||||
/** | /** | ||||
@@ -339,8 +339,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFile(const char *modelPath, uint32_t | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, | |||||
uint32_t *modelId); | |||||
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, uint32_t *modelId); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -362,9 +361,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelS | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, | |||||
uint32_t *modelId, void *workPtr, size_t workSize, | |||||
void *weightPtr, size_t weightSize); | |||||
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, uint32_t *modelId, void *workPtr, | |||||
size_t workSize, void *weightPtr, size_t weightSize); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -387,9 +385,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, | |||||
uint32_t *modelId, void *workPtr, size_t workSize, | |||||
void *weightPtr, size_t weightSize); | |||||
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, uint32_t *modelId, | |||||
void *workPtr, size_t workSize, void *weightPtr, | |||||
size_t weightSize); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -424,8 +422,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithQ(const char *modelPath, uint | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId, | ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId, | ||||
const uint32_t *inputQ, size_t inputQNum, | |||||
const uint32_t *outputQ, size_t outputQNum); | |||||
const uint32_t *inputQ, size_t inputQNum, const uint32_t *outputQ, | |||||
size_t outputQNum); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -455,8 +453,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset | |||||
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | ||||
* aclmdlLoadFromMemWithMem | * aclmdlLoadFromMemWithMem | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, | |||||
aclmdlDataset *output, aclrtStream stream); | |||||
ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output, | |||||
aclrtStream stream); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -831,11 +829,11 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet, | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
* | * | ||||
* @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, | |||||
int16_t cscMatrixR0C0, int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, | |||||
int16_t cscMatrixR1C0, int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, | |||||
int16_t cscMatrixR2C0, int16_t cscMatrixR2C1, int16_t cscMatrixR2C2, | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0, | |||||
int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0, | |||||
int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0, | |||||
int16_t cscMatrixR2C1, int16_t cscMatrixR2C2, | |||||
uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1, | uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1, | ||||
uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0, | uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0, | ||||
uint8_t cscInputBiasR1, uint8_t cscInputBiasR2); | uint8_t cscInputBiasR1, uint8_t cscInputBiasR2); | ||||
@@ -851,7 +849,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, in | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
* | * | ||||
* @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
*/ | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch); | ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch); | ||||
/** | /** | ||||
@@ -865,7 +863,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSe | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
* | * | ||||
* @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
*/ | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch); | ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch); | ||||
/** | /** | ||||
@@ -880,7 +878,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
* | * | ||||
* @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
*/ | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW, | ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW, | ||||
int32_t srcImageSizeH); | int32_t srcImageSizeH); | ||||
@@ -900,14 +898,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
* | * | ||||
* @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, | |||||
int8_t scfSwitch, | |||||
int32_t scfInputSizeW, | |||||
int32_t scfInputSizeH, | |||||
int32_t scfOutputSizeW, | |||||
int32_t scfOutputSizeH, | |||||
uint64_t batchIndex); | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, int8_t scfSwitch, int32_t scfInputSizeW, | |||||
int32_t scfInputSizeH, int32_t scfOutputSizeW, | |||||
int32_t scfOutputSizeH, uint64_t batchIndex); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -925,13 +919,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
* | * | ||||
* @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, | |||||
int8_t cropSwitch, | |||||
int32_t cropStartPosW, | |||||
int32_t cropStartPosH, | |||||
int32_t cropSizeW, | |||||
int32_t cropSizeH, | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, int8_t cropSwitch, int32_t cropStartPosW, | |||||
int32_t cropStartPosH, int32_t cropSizeW, int32_t cropSizeH, | |||||
uint64_t batchIndex); | uint64_t batchIndex); | ||||
/** | /** | ||||
@@ -950,7 +940,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
* | * | ||||
* @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
*/ | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch, | ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch, | ||||
int32_t paddingSizeTop, int32_t paddingSizeBottom, | int32_t paddingSizeTop, int32_t paddingSizeBottom, | ||||
int32_t paddingSizeLeft, int32_t paddingSizeRight, | int32_t paddingSizeLeft, int32_t paddingSizeRight, | ||||
@@ -971,13 +961,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
* | * | ||||
* @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, | |||||
int16_t dtcPixelMeanChn0, | |||||
int16_t dtcPixelMeanChn1, | |||||
int16_t dtcPixelMeanChn2, | |||||
int16_t dtcPixelMeanChn3, | |||||
uint64_t batchIndex); | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, int16_t dtcPixelMeanChn0, | |||||
int16_t dtcPixelMeanChn1, int16_t dtcPixelMeanChn2, | |||||
int16_t dtcPixelMeanChn3, uint64_t batchIndex); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -994,13 +981,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
* | * | ||||
* @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, | |||||
float dtcPixelMinChn0, | |||||
float dtcPixelMinChn1, | |||||
float dtcPixelMinChn2, | |||||
float dtcPixelMinChn3, | |||||
uint64_t batchIndex); | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, float dtcPixelMinChn0, | |||||
float dtcPixelMinChn1, float dtcPixelMinChn2, | |||||
float dtcPixelMinChn3, uint64_t batchIndex); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -1017,13 +1001,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
* | * | ||||
* @see aclmdlCreateAIPP | * @see aclmdlCreateAIPP | ||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, | |||||
float dtcPixelVarReciChn0, | |||||
float dtcPixelVarReciChn1, | |||||
float dtcPixelVarReciChn2, | |||||
float dtcPixelVarReciChn3, | |||||
uint64_t batchIndex); | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, float dtcPixelVarReciChn0, | |||||
float dtcPixelVarReciChn1, float dtcPixelVarReciChn2, | |||||
float dtcPixelVarReciChn3, uint64_t batchIndex); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -1039,10 +1020,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, | |||||
* | * | ||||
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | ||||
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP | * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP | ||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, | |||||
aclmdlDataset *dataset, | |||||
size_t index, | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset *dataset, size_t index, | |||||
const aclmdlAIPP *aippParmsSet); | const aclmdlAIPP *aippParmsSet); | ||||
/** | /** | ||||
@@ -1059,10 +1038,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, | |||||
* | * | ||||
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | ||||
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP | * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP | ||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, | |||||
aclmdlDataset *dataset, | |||||
size_t index, | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlDataset *dataset, size_t index, | |||||
const aclmdlAIPP *aippParmsSet); | const aclmdlAIPP *aippParmsSet); | ||||
/** | /** | ||||
@@ -1080,10 +1057,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, | |||||
* | * | ||||
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | ||||
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP | * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP | ||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, | |||||
size_t index, | |||||
aclmdlInputAippType *type, | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, aclmdlInputAippType *type, | |||||
size_t *dynamicAttachedDataIndex); | size_t *dynamicAttachedDataIndex); | ||||
/** | /** | ||||
@@ -1100,7 +1075,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, | |||||
* | * | ||||
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | ||||
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | ||||
*/ | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo); | ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo); | ||||
/** | /** | ||||
@@ -1119,10 +1094,11 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t ind | |||||
* | * | ||||
* @retval ACL_SUCCESS The function is successfully executed | * @retval ACL_SUCCESS The function is successfully executed | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, | |||||
uint32_t taskId, char *opName, size_t opNameLen, aclTensorDesc **inputDesc, size_t *numInputs, | |||||
aclTensorDesc **outputDesc, size_t *numOutputs); | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, uint32_t taskId, | |||||
char *opName, size_t opNameLen, aclTensorDesc **inputDesc, | |||||
size_t *numInputs, aclTensorDesc **outputDesc, | |||||
size_t *numOutputs); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -1130,7 +1106,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_ | |||||
* | * | ||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlInitDump(); | ACL_FUNC_VISIBILITY aclError aclmdlInitDump(); | ||||
/** | /** | ||||
@@ -1141,7 +1117,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlInitDump(); | |||||
* | * | ||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath); | ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath); | ||||
/** | /** | ||||
@@ -1150,7 +1126,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath); | |||||
* | * | ||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump(); | ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump(); | ||||
/** | /** | ||||
@@ -1162,7 +1138,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump(); | |||||
* | * | ||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *handle, uint32_t *modelId); | ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *handle, uint32_t *modelId); | ||||
/** | /** | ||||
@@ -1172,7 +1148,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *hand | |||||
* @retval the aclmdlConfigHandle pointer | * @retval the aclmdlConfigHandle pointer | ||||
* | * | ||||
* @see aclmdlDestroyConfigHandle | * @see aclmdlDestroyConfigHandle | ||||
*/ | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclmdlConfigHandle *aclmdlCreateConfigHandle(); | ACL_FUNC_VISIBILITY aclmdlConfigHandle *aclmdlCreateConfigHandle(); | ||||
/** | /** | ||||
@@ -1201,7 +1177,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlDestroyConfigHandle(aclmdlConfigHandle *handl | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr, | ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr, | ||||
const void *attrValue, size_t valueSize); | |||||
const void *attrValue, size_t valueSize); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -1219,4 +1195,4 @@ ACL_FUNC_VISIBILITY const char *aclmdlGetTensorRealName(const aclmdlDesc *modelD | |||||
} | } | ||||
#endif | #endif | ||||
#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_ | |||||
#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_ |
@@ -33,9 +33,9 @@ typedef void (*aclDataDeallocator)(void *data, size_t length); | |||||
static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1; | static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1; | ||||
typedef enum aclEngineType { | typedef enum aclEngineType { | ||||
ACL_ENGINE_SYS, | |||||
ACL_ENGINE_AICORE, | |||||
ACL_ENGINE_VECTOR, | |||||
ACL_ENGINE_SYS, | |||||
ACL_ENGINE_AICORE, | |||||
ACL_ENGINE_VECTOR, | |||||
} aclopEngineType; | } aclopEngineType; | ||||
/** | /** | ||||
@@ -148,7 +148,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *att | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues, | ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues, | ||||
const uint8_t *values); | |||||
const uint8_t *values); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -163,7 +163,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *a | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues, | ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues, | ||||
const int64_t *values); | |||||
const int64_t *values); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -178,7 +178,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *at | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues, | ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues, | ||||
const float *values); | |||||
const float *values); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -193,7 +193,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char * | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues, | ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues, | ||||
const char **values); | |||||
const char **values); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -208,11 +208,8 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, | |||||
const char *attrName, | |||||
int numLists, | |||||
const int *numValues, | |||||
const int64_t *const values[]); | |||||
ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char *attrName, int numLists, | |||||
const int *numValues, const int64_t *const values[]); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -242,15 +239,10 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead") | ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead") | ||||
ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, | |||||
int numInputs, | |||||
const aclTensorDesc *const inputDesc[], | |||||
const aclDataBuffer *const inputs[], | |||||
int numOutputs, | |||||
const aclTensorDesc *const outputDesc[], | |||||
aclDataBuffer *const outputs[], | |||||
const aclopAttr *attr, | |||||
aclrtStream stream); | |||||
ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], | |||||
const aclDataBuffer *const inputs[], int numOutputs, | |||||
const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], | |||||
const aclopAttr *attr, aclrtStream stream); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -280,15 +272,9 @@ ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, | |||||
int numInputs, | |||||
aclTensorDesc *inputDesc[], | |||||
aclDataBuffer *inputs[], | |||||
int numOutputs, | |||||
aclTensorDesc *outputDesc[], | |||||
aclDataBuffer *outputs[], | |||||
aclopAttr *attr, | |||||
aclrtStream stream); | |||||
ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[], | |||||
aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[], | |||||
aclDataBuffer *outputs[], aclopAttr *attr, aclrtStream stream); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -306,12 +292,9 @@ ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, | |||||
int numInputs, | |||||
const aclTensorDesc *const inputDesc[], | |||||
int numOutputs, | |||||
const aclTensorDesc *const outputDesc[], | |||||
const aclopAttr *opAttr, | |||||
ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, int numInputs, | |||||
const aclTensorDesc *const inputDesc[], int numOutputs, | |||||
const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr, | |||||
aclopHandle **handle); | aclopHandle **handle); | ||||
/** | /** | ||||
@@ -343,12 +326,9 @@ ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle); | |||||
* | * | ||||
* @see aclopCreateHandle | aclCreateDataBuffer | * @see aclopCreateHandle | aclCreateDataBuffer | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, | |||||
int numInputs, | |||||
const aclDataBuffer *const inputs[], | |||||
int numOutputs, | |||||
aclDataBuffer *const outputs[], | |||||
aclrtStream stream); | |||||
ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInputs, | |||||
const aclDataBuffer *const inputs[], int numOutputs, | |||||
aclDataBuffer *const outputs[], aclrtStream stream); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -364,11 +344,8 @@ ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, | |||||
const aclDataBuffer *srcBuffer, | |||||
const aclTensorDesc *dstDesc, | |||||
aclDataBuffer *dstBuffer, | |||||
uint8_t truncate, | |||||
ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDataBuffer *srcBuffer, | |||||
const aclTensorDesc *dstDesc, aclDataBuffer *dstBuffer, uint8_t truncate, | |||||
aclrtStream stream); | aclrtStream stream); | ||||
/** | /** | ||||
@@ -383,12 +360,9 @@ ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, | |||||
aclTensorDesc *dstDesc, | |||||
uint8_t truncate, | |||||
ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, aclTensorDesc *dstDesc, uint8_t truncate, | |||||
aclopHandle **handle); | aclopHandle **handle); | ||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
* @brief create kernel | * @brief create kernel | ||||
@@ -407,15 +381,10 @@ ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, | |||||
* | * | ||||
* @see aclopCompile | * @see aclopCompile | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, | |||||
const char *kernelId, | |||||
const char *kernelName, | |||||
void *binData, | |||||
int binSize, | |||||
aclopEngineType enginetype, | |||||
ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *kernelId, const char *kernelName, | |||||
void *binData, int binSize, aclopEngineType enginetype, | |||||
aclDataDeallocator deallocator); | aclDataDeallocator deallocator); | ||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
* @brief create kernel | * @brief create kernel | ||||
@@ -430,11 +399,8 @@ ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
typedef aclError (*aclopCompileFunc)(int numInputs, | |||||
const aclTensorDesc *const inputDesc[], | |||||
int numOutputs, | |||||
const aclTensorDesc *const outputDesc[], | |||||
const aclopAttr *opAttr, | |||||
typedef aclError (*aclopCompileFunc)(int numInputs, const aclTensorDesc *const inputDesc[], int numOutputs, | |||||
const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr, | |||||
aclopKernelDesc *aclopKernelDesc); | aclopKernelDesc *aclopKernelDesc); | ||||
/** | /** | ||||
@@ -475,11 +441,8 @@ ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType); | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, | |||||
const char *kernelId, | |||||
uint32_t blockDim, | |||||
const void *args, | |||||
uint32_t argSize); | |||||
ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, const char *kernelId, uint32_t blockDim, | |||||
const void *args, uint32_t argSize); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -510,12 +473,9 @@ ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kerne | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, | |||||
int numInputs, | |||||
const aclTensorDesc *const inputDesc[], | |||||
int numOutputs, | |||||
const aclTensorDesc *const outputDesc[], | |||||
const aclopAttr *attr); | |||||
ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs, | |||||
const aclTensorDesc *const inputDesc[], int numOutputs, | |||||
const aclTensorDesc *const outputDesc[], const aclopAttr *attr); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -533,17 +493,12 @@ ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, | |||||
int numInputs, | |||||
aclTensorDesc *inputDesc[], | |||||
aclDataBuffer *inputs[], | |||||
int numOutputs, | |||||
aclTensorDesc *outputDesc[], | |||||
ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, int numInputs, aclTensorDesc *inputDesc[], | |||||
aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[], | |||||
aclopAttr *attr); | aclopAttr *attr); | ||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif | #endif | ||||
#endif // INC_EXTERNAL_ACL_ACL_OP_H_ | |||||
#endif // INC_EXTERNAL_ACL_ACL_OP_H_ |
@@ -24,21 +24,18 @@ | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
typedef enum aclCompileType { | |||||
ACL_COMPILE_SYS, | |||||
ACL_COMPILE_UNREGISTERED | |||||
} aclopCompileType; | |||||
typedef enum aclCompileType { ACL_COMPILE_SYS, ACL_COMPILE_UNREGISTERED } aclopCompileType; | |||||
typedef enum { | typedef enum { | ||||
ACL_PRECISION_MODE, | |||||
ACL_AICORE_NUM, | |||||
ACL_AUTO_TUNE_MODE, | |||||
ACL_OP_SELECT_IMPL_MODE, | |||||
ACL_OPTYPELIST_FOR_IMPLMODE, | |||||
ACL_OP_DEBUG_LEVEL, | |||||
ACL_DEBUG_DIR, | |||||
ACL_OP_COMPILER_CACHE_MODE, | |||||
ACL_OP_COMPILER_CACHE_DIR | |||||
ACL_PRECISION_MODE, | |||||
ACL_AICORE_NUM, | |||||
ACL_AUTO_TUNE_MODE, | |||||
ACL_OP_SELECT_IMPL_MODE, | |||||
ACL_OPTYPELIST_FOR_IMPLMODE, | |||||
ACL_OP_DEBUG_LEVEL, | |||||
ACL_DEBUG_DIR, | |||||
ACL_OP_COMPILER_CACHE_MODE, | |||||
ACL_OP_COMPILER_CACHE_DIR | |||||
} aclCompileOpt; | } aclCompileOpt; | ||||
/** | /** | ||||
@@ -59,15 +56,10 @@ typedef enum { | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, | |||||
int numInputs, | |||||
const aclTensorDesc *const inputDesc[], | |||||
int numOutputs, | |||||
const aclTensorDesc *const outputDesc[], | |||||
const aclopAttr *attr, | |||||
aclopEngineType engineType, | |||||
aclopCompileType compileFlag, | |||||
const char *opPath); | |||||
ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], | |||||
int numOutputs, const aclTensorDesc *const outputDesc[], | |||||
const aclopAttr *attr, aclopEngineType engineType, | |||||
aclopCompileType compileFlag, const char *opPath); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -90,11 +82,10 @@ ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(const char *opType, | |||||
int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], | |||||
int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], | |||||
const aclopAttr *attr, aclopEngineType engineType, aclopCompileType compileFlag, | |||||
const char *opPath, aclrtStream stream); | |||||
ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute( | |||||
const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], | |||||
int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr, | |||||
aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -112,4 +103,4 @@ ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *val | |||||
} | } | ||||
#endif | #endif | ||||
#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ | |||||
#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ |
@@ -23,24 +23,24 @@ | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
#define ACL_PROF_ACL_API 0x0001 | |||||
#define ACL_PROF_TASK_TIME 0x0002 | |||||
#define ACL_PROF_AICORE_METRICS 0x0004 | |||||
#define ACL_PROF_AICPU 0x0008 | |||||
#define ACL_PROF_ACL_API 0x0001 | |||||
#define ACL_PROF_TASK_TIME 0x0002 | |||||
#define ACL_PROF_AICORE_METRICS 0x0004 | |||||
#define ACL_PROF_AICPU 0x0008 | |||||
/** | /** | ||||
* @deprecated please use aclprofGetOpTypeLen and aclprofGetOpTNameLen instead | * @deprecated please use aclprofGetOpTypeLen and aclprofGetOpTNameLen instead | ||||
*/ | */ | ||||
#define ACL_PROF_MAX_OP_NAME_LEN 257 | |||||
#define ACL_PROF_MAX_OP_TYPE_LEN 65 | |||||
#define ACL_PROF_MAX_OP_NAME_LEN 257 | |||||
#define ACL_PROF_MAX_OP_TYPE_LEN 65 | |||||
typedef enum { | typedef enum { | ||||
ACL_AICORE_ARITHMETIC_UTILIZATION = 0, | |||||
ACL_AICORE_PIPE_UTILIZATION = 1, | |||||
ACL_AICORE_MEMORY_BANDWIDTH = 2, | |||||
ACL_AICORE_L0B_AND_WIDTH = 3, | |||||
ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4, | |||||
ACL_AICORE_NONE = 0xFF | |||||
ACL_AICORE_ARITHMETIC_UTILIZATION = 0, | |||||
ACL_AICORE_PIPE_UTILIZATION = 1, | |||||
ACL_AICORE_MEMORY_BANDWIDTH = 2, | |||||
ACL_AICORE_L0B_AND_WIDTH = 3, | |||||
ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4, | |||||
ACL_AICORE_NONE = 0xFF | |||||
} aclprofAicoreMetrics; | } aclprofAicoreMetrics; | ||||
typedef struct aclprofConfig aclprofConfig; | typedef struct aclprofConfig aclprofConfig; | ||||
@@ -101,7 +101,8 @@ ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig); | |||||
* @see aclprofDestroyConfig | * @see aclprofDestroyConfig | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, | ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, | ||||
aclprofAicoreMetrics aicoreMetrics, aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig); | |||||
aclprofAicoreMetrics aicoreMetrics, | |||||
aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -141,8 +142,7 @@ ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig); | |||||
* | * | ||||
* @see aclprofModelUnSubscribe | * @see aclprofModelUnSubscribe | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, | |||||
const aclprofSubscribeConfig *profSubscribeConfig); | |||||
ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -170,7 +170,7 @@ ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId); | |||||
* @see aclprofDestroySubscribeConfig | * @see aclprofDestroySubscribeConfig | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch, | ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch, | ||||
aclprofAicoreMetrics aicoreMetrics, void *fd); | |||||
aclprofAicoreMetrics aicoreMetrics, void *fd); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -222,7 +222,7 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLe | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclprofGetOpTypeLen(const void *opInfo, size_t opInfoLen, uint32_t index, | ACL_FUNC_VISIBILITY aclError aclprofGetOpTypeLen(const void *opInfo, size_t opInfoLen, uint32_t index, | ||||
size_t *opTypeLen); | |||||
size_t *opTypeLen); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -237,8 +237,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpTypeLen(const void *opInfo, size_t opIn | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, | |||||
char *opType, size_t opTypeLen); | |||||
ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType, | |||||
size_t opTypeLen); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -253,7 +253,7 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoL | |||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclprofGetOpNameLen(const void *opInfo, size_t opInfoLen, uint32_t index, | ACL_FUNC_VISIBILITY aclError aclprofGetOpNameLen(const void *opInfo, size_t opInfoLen, uint32_t index, | ||||
size_t *opNameLen); | |||||
size_t *opNameLen); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -268,8 +268,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpNameLen(const void *opInfo, size_t opIn | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, | |||||
char *opName, size_t opNameLen); | |||||
ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName, | |||||
size_t opNameLen); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -326,4 +326,4 @@ ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLe | |||||
} | } | ||||
#endif | #endif | ||||
#endif // INC_EXTERNAL_ACL_PROF_H_ | |||||
#endif // INC_EXTERNAL_ACL_PROF_H_ |
@@ -28,63 +28,63 @@ extern "C" { | |||||
#define ACL_EVENT_TIME_LINE 0x00000008u | #define ACL_EVENT_TIME_LINE 0x00000008u | ||||
typedef enum aclrtRunMode { | typedef enum aclrtRunMode { | ||||
ACL_DEVICE, | |||||
ACL_HOST, | |||||
ACL_DEVICE, | |||||
ACL_HOST, | |||||
} aclrtRunMode; | } aclrtRunMode; | ||||
typedef enum aclrtTsId { | typedef enum aclrtTsId { | ||||
ACL_TS_ID_AICORE = 0, | |||||
ACL_TS_ID_AIVECTOR = 1, | |||||
ACL_TS_ID_RESERVED = 2, | |||||
ACL_TS_ID_AICORE = 0, | |||||
ACL_TS_ID_AIVECTOR = 1, | |||||
ACL_TS_ID_RESERVED = 2, | |||||
} aclrtTsId; | } aclrtTsId; | ||||
typedef enum aclrtEventStatus { | typedef enum aclrtEventStatus { | ||||
ACL_EVENT_STATUS_COMPLETE = 0, | |||||
ACL_EVENT_STATUS_NOT_READY = 1, | |||||
ACL_EVENT_STATUS_RESERVED = 2, | |||||
ACL_EVENT_STATUS_COMPLETE = 0, | |||||
ACL_EVENT_STATUS_NOT_READY = 1, | |||||
ACL_EVENT_STATUS_RESERVED = 2, | |||||
} aclrtEventStatus; | } aclrtEventStatus; | ||||
typedef enum aclrtCallbackBlockType { | typedef enum aclrtCallbackBlockType { | ||||
ACL_CALLBACK_NO_BLOCK, | |||||
ACL_CALLBACK_BLOCK, | |||||
ACL_CALLBACK_NO_BLOCK, | |||||
ACL_CALLBACK_BLOCK, | |||||
} aclrtCallbackBlockType; | } aclrtCallbackBlockType; | ||||
typedef enum aclrtMemcpyKind { | typedef enum aclrtMemcpyKind { | ||||
ACL_MEMCPY_HOST_TO_HOST, | |||||
ACL_MEMCPY_HOST_TO_DEVICE, | |||||
ACL_MEMCPY_DEVICE_TO_HOST, | |||||
ACL_MEMCPY_DEVICE_TO_DEVICE, | |||||
ACL_MEMCPY_HOST_TO_HOST, | |||||
ACL_MEMCPY_HOST_TO_DEVICE, | |||||
ACL_MEMCPY_DEVICE_TO_HOST, | |||||
ACL_MEMCPY_DEVICE_TO_DEVICE, | |||||
} aclrtMemcpyKind; | } aclrtMemcpyKind; | ||||
typedef enum aclrtMemMallocPolicy { | typedef enum aclrtMemMallocPolicy { | ||||
ACL_MEM_MALLOC_HUGE_FIRST, | |||||
ACL_MEM_MALLOC_HUGE_ONLY, | |||||
ACL_MEM_MALLOC_NORMAL_ONLY, | |||||
ACL_MEM_MALLOC_HUGE_FIRST_P2P, | |||||
ACL_MEM_MALLOC_HUGE_ONLY_P2P, | |||||
ACL_MEM_MALLOC_NORMAL_ONLY_P2P, | |||||
ACL_MEM_MALLOC_HUGE_FIRST, | |||||
ACL_MEM_MALLOC_HUGE_ONLY, | |||||
ACL_MEM_MALLOC_NORMAL_ONLY, | |||||
ACL_MEM_MALLOC_HUGE_FIRST_P2P, | |||||
ACL_MEM_MALLOC_HUGE_ONLY_P2P, | |||||
ACL_MEM_MALLOC_NORMAL_ONLY_P2P, | |||||
} aclrtMemMallocPolicy; | } aclrtMemMallocPolicy; | ||||
typedef enum aclrtMemAttr { | typedef enum aclrtMemAttr { | ||||
ACL_DDR_MEM, | |||||
ACL_HBM_MEM, | |||||
ACL_DDR_MEM_HUGE, | |||||
ACL_DDR_MEM_NORMAL, | |||||
ACL_HBM_MEM_HUGE, | |||||
ACL_HBM_MEM_NORMAL, | |||||
ACL_DDR_MEM_P2P_HUGE, | |||||
ACL_DDR_MEM_P2P_NORMAL, | |||||
ACL_HBM_MEM_P2P_HUGE, | |||||
ACL_HBM_MEM_P2P_NORMAL, | |||||
ACL_DDR_MEM, | |||||
ACL_HBM_MEM, | |||||
ACL_DDR_MEM_HUGE, | |||||
ACL_DDR_MEM_NORMAL, | |||||
ACL_HBM_MEM_HUGE, | |||||
ACL_HBM_MEM_NORMAL, | |||||
ACL_DDR_MEM_P2P_HUGE, | |||||
ACL_DDR_MEM_P2P_NORMAL, | |||||
ACL_HBM_MEM_P2P_HUGE, | |||||
ACL_HBM_MEM_P2P_NORMAL, | |||||
} aclrtMemAttr; | } aclrtMemAttr; | ||||
typedef enum aclrtGroupAttr { | typedef enum aclrtGroupAttr { | ||||
ACL_GROUP_AICORE_INT, | |||||
ACL_GROUP_AIV_INT, | |||||
ACL_GROUP_AIC_INT, | |||||
ACL_GROUP_SDMANUM_INT, | |||||
ACL_GROUP_ASQNUM_INT, | |||||
ACL_GROUP_GROUPID_INT | |||||
ACL_GROUP_AICORE_INT, | |||||
ACL_GROUP_AIV_INT, | |||||
ACL_GROUP_AIC_INT, | |||||
ACL_GROUP_SDMANUM_INT, | |||||
ACL_GROUP_ASQNUM_INT, | |||||
ACL_GROUP_GROUPID_INT | |||||
} aclrtGroupAttr; | } aclrtGroupAttr; | ||||
typedef struct tagRtGroupInfo aclrtGroupInfo; | typedef struct tagRtGroupInfo aclrtGroupInfo; | ||||
@@ -487,7 +487,7 @@ ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stre | |||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream); | ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream); | ||||
/** | |||||
/** | |||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
* @brief Queries an event's status | * @brief Queries an event's status | ||||
* | * | ||||
@@ -549,9 +549,7 @@ ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, | |||||
* | * | ||||
* @see aclrtFree | acldvppMalloc | aclrtMallocCached | * @see aclrtFree | acldvppMalloc | aclrtMallocCached | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, | |||||
size_t size, | |||||
aclrtMemMallocPolicy policy); | |||||
ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -574,9 +572,7 @@ ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, | |||||
* | * | ||||
* @see aclrtFree | aclrtMalloc | * @see aclrtFree | aclrtMalloc | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, | |||||
size_t size, | |||||
aclrtMemMallocPolicy policy); | |||||
ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -667,10 +663,7 @@ ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr); | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, | |||||
size_t destMax, | |||||
const void *src, | |||||
size_t count, | |||||
ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count, | |||||
aclrtMemcpyKind kind); | aclrtMemcpyKind kind); | ||||
/** | /** | ||||
@@ -717,38 +710,31 @@ ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t | |||||
* | * | ||||
* @see aclrtSynchronizeStream | * @see aclrtSynchronizeStream | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, | |||||
size_t destMax, | |||||
const void *src, | |||||
size_t count, | |||||
aclrtMemcpyKind kind, | |||||
aclrtStream stream); | |||||
ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count, | |||||
aclrtMemcpyKind kind, aclrtStream stream); | |||||
/** | /** | ||||
* @ingroup AscendCL | |||||
* @brief Asynchronous initialize memory | |||||
* and set contents of memory to specified value async | |||||
* | |||||
* @par Function | |||||
* @ingroup AscendCL | |||||
* @brief Asynchronous initialize memory | |||||
* and set contents of memory to specified value async | |||||
* | |||||
* @par Function | |||||
* The memory to be initialized is on the Host or device side, | * The memory to be initialized is on the Host or device side, | ||||
* and the system determines whether | * and the system determines whether | ||||
* it is host or device according to the address | * it is host or device according to the address | ||||
* | * | ||||
* @param devPtr [IN] destination address pointer | |||||
* @param maxCount [IN] Max length of destination address memory | |||||
* @param value [IN] set value | |||||
* @param count [IN] the number of byte to set | |||||
* @param stream [IN] asynchronized task stream | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtSynchronizeStream | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, | |||||
size_t maxCount, | |||||
int32_t value, | |||||
size_t count, | |||||
* @param devPtr [IN] destination address pointer | |||||
* @param maxCount [IN] Max length of destination address memory | |||||
* @param value [IN] set value | |||||
* @param count [IN] the number of byte to set | |||||
* @param stream [IN] asynchronized task stream | |||||
* | |||||
* @retval ACL_SUCCESS The function is successfully executed. | |||||
* @retval OtherValues Failure | |||||
* | |||||
* @see aclrtSynchronizeStream | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count, | |||||
aclrtStream stream); | aclrtStream stream); | ||||
/** | /** | ||||
@@ -894,11 +880,8 @@ ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo); | |||||
* | * | ||||
* @see aclrtGetGroupCount | aclrtGetAllGroupInfo | * @see aclrtGetGroupCount | aclrtGetAllGroupInfo | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, | |||||
int32_t groupIndex, | |||||
aclrtGroupAttr attr, | |||||
void *attrValue, | |||||
size_t valueLen, | |||||
ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupIndex, | |||||
aclrtGroupAttr attr, void *attrValue, size_t valueLen, | |||||
size_t *paramRetSize); | size_t *paramRetSize); | ||||
/** | /** | ||||
@@ -961,5 +944,4 @@ ACL_FUNC_VISIBILITY aclError aclrtGetMemInfo(aclrtMemAttr attr, size_t *free, si | |||||
} | } | ||||
#endif | #endif | ||||
#endif // INC_EXTERNAL_ACL_ACL_RT_H_ | |||||
#endif // INC_EXTERNAL_ACL_ACL_RT_H_ |
@@ -24,10 +24,10 @@ extern "C" { | |||||
#endif | #endif | ||||
enum acltdtTensorType { | enum acltdtTensorType { | ||||
ACL_TENSOR_DATA_UNDEFINED = -1, | |||||
ACL_TENSOR_DATA_TENSOR, | |||||
ACL_TENSOR_DATA_END_OF_SEQUENCE, | |||||
ACL_TENSOR_DATA_ABNORMAL | |||||
ACL_TENSOR_DATA_UNDEFINED = -1, | |||||
ACL_TENSOR_DATA_TENSOR, | |||||
ACL_TENSOR_DATA_END_OF_SEQUENCE, | |||||
ACL_TENSOR_DATA_ABNORMAL | |||||
}; | }; | ||||
typedef struct acltdtDataItem acltdtDataItem; | typedef struct acltdtDataItem acltdtDataItem; | ||||
@@ -64,7 +64,7 @@ ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem * | |||||
* | * | ||||
* @retval null for failed | * @retval null for failed | ||||
* @retval OtherValues success | * @retval OtherValues success | ||||
*/ | |||||
*/ | |||||
ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem); | ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem); | ||||
/** | /** | ||||
@@ -75,7 +75,7 @@ ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataIt | |||||
* | * | ||||
* @retval 0 for failed | * @retval 0 for failed | ||||
* @retval OtherValues success | * @retval OtherValues success | ||||
*/ | |||||
*/ | |||||
ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem); | ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem); | ||||
/** | /** | ||||
@@ -86,7 +86,7 @@ ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataI | |||||
* | * | ||||
* @retval 0 for failed | * @retval 0 for failed | ||||
* @retval OtherValues success | * @retval OtherValues success | ||||
*/ | |||||
*/ | |||||
ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem); | ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem); | ||||
/** | /** | ||||
@@ -118,12 +118,8 @@ ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataIte | |||||
* | * | ||||
* @see acltdtDestroyDataItem | * @see acltdtDestroyDataItem | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, | |||||
const int64_t *dims, | |||||
size_t dimNum, | |||||
aclDataType dataType, | |||||
void *data, | |||||
size_t size); | |||||
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum, | |||||
aclDataType dataType, void *data, size_t size); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -254,8 +250,7 @@ ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle); | |||||
* | * | ||||
* @see acltdtReceiveTensor | * @see acltdtReceiveTensor | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, | |||||
const acltdtDataset *dataset, | |||||
ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset, | |||||
int32_t timeout); | int32_t timeout); | ||||
/** | /** | ||||
@@ -271,13 +266,11 @@ ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, | |||||
* | * | ||||
* @see acltdtSendTensor | * @see acltdtSendTensor | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, | |||||
acltdtDataset *dataset, | |||||
ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset, | |||||
int32_t timeout); | int32_t timeout); | ||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif | #endif | ||||
#endif //INC_EXTERNAL_ACL_ACL_TDT_H_ | |||||
#endif // INC_EXTERNAL_ACL_ACL_TDT_H_ |
@@ -23,80 +23,80 @@ | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
static const int32_t ACL_RT_SUCCESS = 0; // success | |||||
static const int32_t ACL_RT_SUCCESS = 0; // success | |||||
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||||
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||||
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||||
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||||
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||||
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||||
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||||
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||||
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||||
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||||
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||||
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||||
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||||
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||||
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||||
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||||
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||||
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||||
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||||
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||||
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||||
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||||
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||||
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||||
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||||
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||||
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||||
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||||
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||||
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||||
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||||
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||||
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||||
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||||
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||||
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||||
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||||
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif | #endif | ||||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ |
@@ -23,17 +23,9 @@ | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
typedef enum aclTransType { | |||||
ACL_TRANS_N, | |||||
ACL_TRANS_T, | |||||
ACL_TRANS_NZ, | |||||
ACL_TRANS_NZ_T | |||||
} aclTransType; | |||||
typedef enum aclTransType { ACL_TRANS_N, ACL_TRANS_T, ACL_TRANS_NZ, ACL_TRANS_NZ_T } aclTransType; | |||||
typedef enum aclComputeType { | |||||
ACL_COMPUTE_HIGH_PRECISION, | |||||
ACL_COMPUTE_LOW_PRECISION | |||||
} aclComputeType; | |||||
typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECISION } aclComputeType; | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -61,12 +53,11 @@ typedef enum aclComputeType { | |||||
* | * | ||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, | |||||
const void *alpha, const void *a, int lda, aclDataType dataTypeA, | |||||
const void *x, int incx, aclDataType dataTypeX, | |||||
const void *beta, void *y, int incy, aclDataType dataTypeY, | |||||
aclComputeType type, aclrtStream stream); | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, const void *alpha, const void *a, int lda, | |||||
aclDataType dataTypeA, const void *x, int incx, aclDataType dataTypeX, | |||||
const void *beta, void *y, int incy, aclDataType dataTypeY, | |||||
aclComputeType type, aclrtStream stream); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -83,15 +74,10 @@ ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, | |||||
* | * | ||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, | |||||
int m, | |||||
int n, | |||||
aclDataType dataTypeA, | |||||
aclDataType dataTypeX, | |||||
aclDataType dataTypeY, | |||||
aclComputeType type, | |||||
aclopHandle **handle); | |||||
*/ | |||||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, int m, int n, aclDataType dataTypeA, | |||||
aclDataType dataTypeX, aclDataType dataTypeY, | |||||
aclComputeType type, aclopHandle **handle); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -115,18 +101,9 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, | |||||
int m, | |||||
int n, | |||||
const aclFloat16 *alpha, | |||||
const aclFloat16 *a, | |||||
int lda, | |||||
const aclFloat16 *x, | |||||
int incx, | |||||
const aclFloat16 *beta, | |||||
aclFloat16 *y, | |||||
int incy, | |||||
aclComputeType type, | |||||
ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, const aclFloat16 *alpha, | |||||
const aclFloat16 *a, int lda, const aclFloat16 *x, int incx, | |||||
const aclFloat16 *beta, aclFloat16 *y, int incy, aclComputeType type, | |||||
aclrtStream stream); | aclrtStream stream); | ||||
/** | /** | ||||
@@ -142,10 +119,7 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, | |||||
int m, | |||||
int n, | |||||
aclComputeType type, | |||||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, int m, int n, aclComputeType type, | |||||
aclopHandle **handle); | aclopHandle **handle); | ||||
/** | /** | ||||
@@ -171,19 +145,9 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, | |||||
int m, | |||||
int n, | |||||
const int32_t *alpha, | |||||
const int8_t *a, | |||||
int lda, | |||||
const int8_t *x, | |||||
int incx, | |||||
const int32_t *beta, | |||||
int32_t *y, | |||||
int incy, | |||||
aclComputeType type, | |||||
aclrtStream stream); | |||||
ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, const int32_t *alpha, const int8_t *a, | |||||
int lda, const int8_t *x, int incx, const int32_t *beta, int32_t *y, | |||||
int incy, aclComputeType type, aclrtStream stream); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -198,10 +162,7 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, | |||||
int m, | |||||
int n, | |||||
aclComputeType type, | |||||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, int m, int n, aclComputeType type, | |||||
aclopHandle **handle); | aclopHandle **handle); | ||||
/** | /** | ||||
@@ -233,26 +194,11 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, | |||||
aclTransType transB, | |||||
aclTransType transC, | |||||
int m, | |||||
int n, | |||||
int k, | |||||
const void *alpha, | |||||
const void *matrixA, | |||||
int lda, | |||||
aclDataType dataTypeA, | |||||
const void *matrixB, | |||||
int ldb, | |||||
aclDataType dataTypeB, | |||||
const void *beta, | |||||
void *matrixC, | |||||
int ldc, | |||||
aclDataType dataTypeC, | |||||
aclComputeType type, | |||||
aclrtStream stream); | |||||
ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, | |||||
int k, const void *alpha, const void *matrixA, int lda, | |||||
aclDataType dataTypeA, const void *matrixB, int ldb, aclDataType dataTypeB, | |||||
const void *beta, void *matrixC, int ldc, aclDataType dataTypeC, | |||||
aclComputeType type, aclrtStream stream); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -274,18 +220,10 @@ ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, | |||||
aclTransType transB, | |||||
aclTransType transC, | |||||
int m, | |||||
int n, | |||||
int k, | |||||
aclDataType dataTypeA, | |||||
aclDataType dataTypeB, | |||||
aclDataType dataTypeC, | |||||
aclComputeType type, | |||||
aclopHandle **handle); | |||||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, | |||||
int m, int n, int k, aclDataType dataTypeA, | |||||
aclDataType dataTypeB, aclDataType dataTypeC, | |||||
aclComputeType type, aclopHandle **handle); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -313,22 +251,10 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, | |||||
aclTransType transB, | |||||
aclTransType transC, | |||||
int m, | |||||
int n, | |||||
int k, | |||||
const aclFloat16 *alpha, | |||||
const aclFloat16 *matrixA, | |||||
int lda, | |||||
const aclFloat16 *matrixB, | |||||
int ldb, | |||||
const aclFloat16 *beta, | |||||
aclFloat16 *matrixC, | |||||
int ldc, | |||||
aclComputeType type, | |||||
aclrtStream stream); | |||||
ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, | |||||
int k, const aclFloat16 *alpha, const aclFloat16 *matrixA, int lda, | |||||
const aclFloat16 *matrixB, int ldb, const aclFloat16 *beta, | |||||
aclFloat16 *matrixC, int ldc, aclComputeType type, aclrtStream stream); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -346,13 +272,8 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, | |||||
aclTransType transB, | |||||
aclTransType transC, | |||||
int m, | |||||
int n, | |||||
int k, | |||||
aclComputeType type, | |||||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, aclTransType transB, aclTransType transC, | |||||
int m, int n, int k, aclComputeType type, | |||||
aclopHandle **handle); | aclopHandle **handle); | ||||
/** | /** | ||||
@@ -381,23 +302,10 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, | |||||
aclTransType transB, | |||||
aclTransType transC, | |||||
int m, | |||||
int n, | |||||
int k, | |||||
const int32_t *alpha, | |||||
const int8_t *matrixA, | |||||
int lda, | |||||
const int8_t *matrixB, | |||||
int ldb, | |||||
const int32_t *beta, | |||||
int32_t *matrixC, | |||||
int ldc, | |||||
aclComputeType type, | |||||
aclrtStream stream); | |||||
ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, | |||||
int k, const int32_t *alpha, const int8_t *matrixA, int lda, | |||||
const int8_t *matrixB, int ldb, const int32_t *beta, int32_t *matrixC, | |||||
int ldc, aclComputeType type, aclrtStream stream); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -415,17 +323,12 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, | |||||
* @retval ACL_SUCCESS The function is successfully executed. | * @retval ACL_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, | |||||
aclTransType transB, | |||||
aclTransType transC, | |||||
int m, | |||||
int n, | |||||
int k, | |||||
aclComputeType type, | |||||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, | |||||
int m, int n, int k, aclComputeType type, | |||||
aclopHandle **handle); | aclopHandle **handle); | ||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif | #endif | ||||
#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ | |||||
#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ |
@@ -32,8 +32,8 @@ typedef struct aclfvSearchResult aclfvSearchResult; | |||||
// search operation type | // search operation type | ||||
enum aclfvSearchType { | enum aclfvSearchType { | ||||
SEARCH_1_N, // 1:N operation type | |||||
SEARCH_N_M // N:M operation type | |||||
SEARCH_1_N, // 1:N operation type | |||||
SEARCH_N_M // N:M operation type | |||||
}; | }; | ||||
/** | /** | ||||
@@ -104,7 +104,8 @@ ACL_FUNC_VISIBILITY aclError aclfvSetNMTopNum(aclfvInitPara *initPara, uint32_t | |||||
* @retval OtherValues success. | * @retval OtherValues success. | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset, | ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset, | ||||
uint32_t featureLen, uint32_t featureCount, uint8_t *featureData, uint32_t featureDataLen); | |||||
uint32_t featureLen, uint32_t featureCount, | |||||
uint8_t *featureData, uint32_t featureDataLen); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -233,8 +234,9 @@ ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInp | |||||
* @retval null for failed. OtherValues success | * @retval null for failed. OtherValues success | ||||
*/ | */ | ||||
ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum, | ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum, | ||||
uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, uint32_t *resultOffset, float *resultDistance, | |||||
uint32_t dataLen); | |||||
uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, | |||||
uint32_t *resultOffset, float *resultDistance, | |||||
uint32_t dataLen); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -348,4 +350,4 @@ ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput | |||||
} | } | ||||
#endif | #endif | ||||
#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ | |||||
#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ |
@@ -311,6 +311,9 @@ const std::string OP_BANK_UPDATE_FLAG = "ge.op_bank_update"; | |||||
// 0: data multi; 1: model multi; | // 0: data multi; 1: model multi; | ||||
const std::string HCOM_MULTI_MODE = "ge.hcomMultiMode"; | const std::string HCOM_MULTI_MODE = "ge.hcomMultiMode"; | ||||
// atc and ir option | |||||
const char *const INPUT_SHAPE_RANGE = "input_shape_range"; | |||||
// Graph run mode | // Graph run mode | ||||
enum GraphRunMode { PREDICTION = 0, TRAIN }; | enum GraphRunMode { PREDICTION = 0, TRAIN }; | ||||
@@ -390,6 +393,7 @@ static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str(); | |||||
#ifdef __GNUC__ | #ifdef __GNUC__ | ||||
const std::set<std::string> ir_builder_suppported_options = {INPUT_FORMAT, | const std::set<std::string> ir_builder_suppported_options = {INPUT_FORMAT, | ||||
INPUT_SHAPE, | INPUT_SHAPE, | ||||
INPUT_SHAPE_RANGE, | |||||
OP_NAME_MAP, | OP_NAME_MAP, | ||||
DYNAMIC_BATCH_SIZE, | DYNAMIC_BATCH_SIZE, | ||||
DYNAMIC_IMAGE_SIZE, | DYNAMIC_IMAGE_SIZE, | ||||
@@ -27,7 +27,7 @@ | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
extern "C" { | extern "C" { | ||||
#endif // __cplusplus | |||||
#endif // __cplusplus | |||||
/** | /** | ||||
* @brief Initialize HCCL. | * @brief Initialize HCCL. | ||||
@@ -66,14 +66,15 @@ extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *root | |||||
* @param sendBuf A pointer identifying the input data address of the operator. | * @param sendBuf A pointer identifying the input data address of the operator. | ||||
* @param recvBuf A pointer identifying the output data address of the operator. | * @param recvBuf A pointer identifying the output data address of the operator. | ||||
* @param count An integer(u64) identifying the number of the output data. | * @param count An integer(u64) identifying the number of the output data. | ||||
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, float32. | |||||
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, | |||||
* float32. | |||||
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | ||||
* @param comm A pointer identifying the communication resource based on. | * @param comm A pointer identifying the communication resource based on. | ||||
* @param stream A pointer identifying the stream information. | * @param stream A pointer identifying the stream information. | ||||
* @return HcclResult | |||||
* @return HcclResult | |||||
*/ | */ | ||||
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, | |||||
HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||||
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, | |||||
HcclComm comm, aclrtStream stream); | |||||
/** | /** | ||||
* @brief Broadcast operator. | * @brief Broadcast operator. | ||||
@@ -84,10 +85,10 @@ HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||||
* @param root An integer(u32) identifying the the root rank in the operator. | * @param root An integer(u32) identifying the the root rank in the operator. | ||||
* @param comm A pointer identifying the communication resource based on | * @param comm A pointer identifying the communication resource based on | ||||
* @param stream A pointer identifying the stream information. | * @param stream A pointer identifying the stream information. | ||||
* @return HcclResult | |||||
* @return HcclResult | |||||
*/ | */ | ||||
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, | |||||
aclrtStream stream); | |||||
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, | |||||
aclrtStream stream); | |||||
/** | /** | ||||
* @brief ReduceScatter operator. | * @brief ReduceScatter operator. | ||||
@@ -99,10 +100,10 @@ aclrtStream stream); | |||||
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | ||||
* @param comm A pointer identifying the communication resource based on. | * @param comm A pointer identifying the communication resource based on. | ||||
* @param stream A pointer identifying the stream information. | * @param stream A pointer identifying the stream information. | ||||
* @return HcclResult | |||||
* @return HcclResult | |||||
*/ | */ | ||||
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, | |||||
HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||||
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, | |||||
HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||||
/** | /** | ||||
* @brief AllGather operator. | * @brief AllGather operator. | ||||
@@ -113,10 +114,10 @@ HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||||
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | ||||
* @param comm A pointer identifying the communication resource based on. | * @param comm A pointer identifying the communication resource based on. | ||||
* @param stream A pointer identifying the stream information. | * @param stream A pointer identifying the stream information. | ||||
* @return HcclResult | |||||
* @return HcclResult | |||||
*/ | */ | ||||
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, | |||||
HcclComm comm, aclrtStream stream); | |||||
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, | |||||
aclrtStream stream); | |||||
/** | /** | ||||
* @brief Destroy HCCL comm | * @brief Destroy HCCL comm | ||||
@@ -129,5 +130,5 @@ extern HcclResult HcclCommDestroy(HcclComm comm); | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif // __cplusplus | |||||
#endif // HCCL_H_ | |||||
#endif // __cplusplus | |||||
#endif // HCCL_H_ |
@@ -16,10 +16,10 @@ | |||||
/** | /** | ||||
* @file hccl_types.h | * @file hccl_types.h | ||||
* @brief HCCL data type definition | |||||
* | |||||
* @brief HCCL data type definition | |||||
* | |||||
*/ | */ | ||||
#ifndef HCCL_TYPES_H_ | #ifndef HCCL_TYPES_H_ | ||||
#define HCCL_TYPES_H_ | #define HCCL_TYPES_H_ | ||||
@@ -27,33 +27,33 @@ | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
extern "C" { | extern "C" { | ||||
#endif // __cplusplus | |||||
#endif // __cplusplus | |||||
/** | /** | ||||
* @brief HCCL functions return value definition | * @brief HCCL functions return value definition | ||||
*/ | */ | ||||
typedef enum { | typedef enum { | ||||
HCCL_SUCCESS = 0, /**< success */ | |||||
HCCL_E_PARA = 1, /**< parameter error */ | |||||
HCCL_E_PTR = 2, /**< empty pointer */ | |||||
HCCL_E_MEMORY = 3, /**< memory error */ | |||||
HCCL_E_INTERNAL = 4, /**< internal error */ | |||||
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||||
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||||
HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||||
HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||||
HCCL_E_TIMEOUT = 9, /**< timeout */ | |||||
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||||
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||||
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||||
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||||
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||||
HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||||
HCCL_E_DRV = 16, /**< call driver api fail */ | |||||
HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||||
HCCL_E_CCE = 18, /**< call cce api fail */ | |||||
HCCL_E_NETWORK = 19, /**< call network api fail */ | |||||
HCCL_E_RESERVED /**< reserved */ | |||||
HCCL_SUCCESS = 0, /**< success */ | |||||
HCCL_E_PARA = 1, /**< parameter error */ | |||||
HCCL_E_PTR = 2, /**< empty pointer */ | |||||
HCCL_E_MEMORY = 3, /**< memory error */ | |||||
HCCL_E_INTERNAL = 4, /**< internal error */ | |||||
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||||
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||||
HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||||
HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||||
HCCL_E_TIMEOUT = 9, /**< timeout */ | |||||
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||||
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||||
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||||
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||||
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||||
HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||||
HCCL_E_DRV = 16, /**< call driver api fail */ | |||||
HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||||
HCCL_E_CCE = 18, /**< call cce api fail */ | |||||
HCCL_E_NETWORK = 19, /**< call network api fail */ | |||||
HCCL_E_RESERVED /**< reserved */ | |||||
} HcclResult; | } HcclResult; | ||||
/** | /** | ||||
@@ -65,37 +65,37 @@ typedef void *HcclComm; | |||||
* @brief HCCL Reduction opperation | * @brief HCCL Reduction opperation | ||||
*/ | */ | ||||
typedef enum { | typedef enum { | ||||
HCCL_REDUCE_SUM = 0, /**< sum */ | |||||
HCCL_REDUCE_PROD = 1, /**< prod */ | |||||
HCCL_REDUCE_MAX = 2, /**< max */ | |||||
HCCL_REDUCE_MIN = 3, /**< min */ | |||||
HCCL_REDUCE_RESERVED /**< reserved */ | |||||
HCCL_REDUCE_SUM = 0, /**< sum */ | |||||
HCCL_REDUCE_PROD = 1, /**< prod */ | |||||
HCCL_REDUCE_MAX = 2, /**< max */ | |||||
HCCL_REDUCE_MIN = 3, /**< min */ | |||||
HCCL_REDUCE_RESERVED /**< reserved */ | |||||
} HcclReduceOp; | } HcclReduceOp; | ||||
/** | /** | ||||
* @brief HCCL data type | * @brief HCCL data type | ||||
*/ | */ | ||||
typedef enum { | typedef enum { | ||||
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||||
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||||
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||||
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||||
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||||
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ | |||||
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ | |||||
HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||||
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||||
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||||
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||||
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||||
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||||
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ | |||||
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ | |||||
HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||||
} HcclDataType; | } HcclDataType; | ||||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||||
/** | /** | ||||
* @brief HCCL root info | * @brief HCCL root info | ||||
*/ | */ | ||||
typedef struct HcclRootInfoDef { | typedef struct HcclRootInfoDef { | ||||
char internal[HCCL_ROOT_INFO_BYTES]; | |||||
char internal[HCCL_ROOT_INFO_BYTES]; | |||||
} HcclRootInfo; | } HcclRootInfo; | ||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif // __cplusplus | |||||
#endif // HCCL_TYPES_H_ | |||||
#endif // __cplusplus | |||||
#endif // HCCL_TYPES_H_ |
@@ -23,80 +23,80 @@ | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
static const int32_t ACL_RT_SUCCESS = 0; // success | |||||
static const int32_t ACL_RT_SUCCESS = 0; // success | |||||
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||||
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||||
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||||
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||||
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||||
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||||
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||||
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||||
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||||
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||||
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||||
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||||
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||||
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||||
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||||
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||||
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||||
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||||
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||||
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||||
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||||
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||||
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||||
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||||
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||||
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||||
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||||
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||||
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||||
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||||
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||||
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||||
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||||
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||||
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||||
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||||
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||||
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif | #endif | ||||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ |
@@ -20,6 +20,7 @@ | |||||
#include <cstdint> | #include <cstdint> | ||||
#include "framework/common/ge_inner_error_codes.h" | #include "framework/common/ge_inner_error_codes.h" | ||||
#include "common/util/error_manager/error_manager.h" | |||||
#include "toolchain/slog.h" | #include "toolchain/slog.h" | ||||
#ifdef __GNUC__ | #ifdef __GNUC__ | ||||
#include <unistd.h> | #include <unistd.h> | ||||
@@ -55,9 +56,10 @@ inline bool IsLogEnable(int module_name, int log_level) { | |||||
return (enable == 1); | return (enable == 1); | ||||
} | } | ||||
#define GELOGE(ERROR_CODE, fmt, ...) \ | |||||
dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ | |||||
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__) | |||||
#define GELOGE(ERROR_CODE, fmt, ...) \ | |||||
dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %d(%s) %s" fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ | |||||
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ | |||||
##__VA_ARGS__) | |||||
#define GELOGW(fmt, ...) \ | #define GELOGW(fmt, ...) \ | ||||
if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) \ | if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) \ | ||||
dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | ||||
@@ -255,10 +255,10 @@ | |||||
exec_expr1; \ | exec_expr1; \ | ||||
} | } | ||||
#define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg) \ | |||||
{ \ | |||||
GELOGE(_status, "%s", errormsg); \ | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {errormsg}); \ | |||||
#define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg) \ | |||||
{ \ | |||||
GELOGE(_status, "[Check][InnerData]%s", errormsg); \ | |||||
REPORT_INNER_ERROR("E19999", "%s", errormsg); \ | |||||
} | } | ||||
#define GE_WARNINGLOG_AND_ERRORMSG(errormsg) \ | #define GE_WARNINGLOG_AND_ERRORMSG(errormsg) \ | ||||
@@ -30,12 +30,12 @@ | |||||
#include "framework/common/ge_inner_error_codes.h" | #include "framework/common/ge_inner_error_codes.h" | ||||
#include "mmpa/mmpa_api.h" | #include "mmpa/mmpa_api.h" | ||||
#define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ | |||||
do { \ | |||||
if (size <= 0) { \ | |||||
DOMI_LOGE("param[%s] is not a positive number", #size); \ | |||||
return PARAM_INVALID; \ | |||||
} \ | |||||
#define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ | |||||
do { \ | |||||
if (size <= 0) { \ | |||||
DOMI_LOGE("param[%s] is not a positive number", #size); \ | |||||
return PARAM_INVALID; \ | |||||
} \ | |||||
} while (0) | } while (0) | ||||
#define CHECK_FALSE_EXEC(expr, exec_expr, ...) \ | #define CHECK_FALSE_EXEC(expr, exec_expr, ...) \ | ||||
@@ -113,84 +113,75 @@ | |||||
} while (0) | } while (0) | ||||
// Check if the parameter is null. If yes, return PARAM_INVALID and record the error | // Check if the parameter is null. If yes, return PARAM_INVALID and record the error | ||||
#define GE_CHECK_NOTNULL(val) \ | |||||
do { \ | |||||
if (val == nullptr) { \ | |||||
DOMI_LOGE("param[%s] must not be null.", #val); \ | |||||
return ge::PARAM_INVALID; \ | |||||
} \ | |||||
#define GE_CHECK_NOTNULL(val) \ | |||||
do { \ | |||||
if (val == nullptr) { \ | |||||
DOMI_LOGE("[Check][Param:%s]null is invalid when %s.", #val, __FUNCTION__); \ | |||||
return ge::PARAM_INVALID; \ | |||||
} \ | |||||
} while (0) | } while (0) | ||||
// Check if the parameter is null. If yes, just return and record the error | // Check if the parameter is null. If yes, just return and record the error | ||||
#define GE_CHECK_NOTNULL_JUST_RETURN(val) \ | |||||
do { \ | |||||
if (val == nullptr) { \ | |||||
DOMI_LOGE("param[%s] must not be null.", #val); \ | |||||
return; \ | |||||
} \ | |||||
#define GE_CHECK_NOTNULL_JUST_RETURN(val) \ | |||||
do { \ | |||||
if (val == nullptr) { \ | |||||
DOMI_LOGE("param[%s] must not be null.", #val); \ | |||||
return; \ | |||||
} \ | |||||
} while (0) | } while (0) | ||||
// Check whether the parameter is null. If so, execute the exec_expr expression and record the error log | // Check whether the parameter is null. If so, execute the exec_expr expression and record the error log | ||||
#define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \ | |||||
do { \ | |||||
if (val == nullptr) { \ | |||||
DOMI_LOGE("param[%s] must not be null.", #val); \ | |||||
exec_expr; \ | |||||
} \ | |||||
#define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \ | |||||
do { \ | |||||
if (val == nullptr) { \ | |||||
DOMI_LOGE("param[%s] must not be null.", #val); \ | |||||
exec_expr; \ | |||||
} \ | |||||
} while (0) | } while (0) | ||||
// Check whether the parameter is null. If yes, return directly and record the error log | // Check whether the parameter is null. If yes, return directly and record the error log | ||||
#define GE_RT_VOID_CHECK_NOTNULL(val) \ | |||||
do { \ | |||||
if (val == nullptr) { \ | |||||
DOMI_LOGE("param[%s] must not be null.", #val); \ | |||||
return; \ | |||||
} \ | |||||
#define GE_RT_VOID_CHECK_NOTNULL(val) \ | |||||
do { \ | |||||
if (val == nullptr) { \ | |||||
DOMI_LOGE("param[%s] must not be null.", #val); \ | |||||
return; \ | |||||
} \ | |||||
} while (0) | } while (0) | ||||
// Check if the parameter is null. If yes, return false and record the error log | // Check if the parameter is null. If yes, return false and record the error log | ||||
#define GE_RT_FALSE_CHECK_NOTNULL(val) \ | |||||
do { \ | |||||
if (val == nullptr) { \ | |||||
DOMI_LOGE("param[%s] must not be null.", #val); \ | |||||
return false; \ | |||||
} \ | |||||
#define GE_RT_FALSE_CHECK_NOTNULL(val) \ | |||||
do { \ | |||||
if (val == nullptr) { \ | |||||
DOMI_LOGE("param[%s] must not be null.", #val); \ | |||||
return false; \ | |||||
} \ | |||||
} while (0) | } while (0) | ||||
// Check if the parameter is out of bounds | // Check if the parameter is out of bounds | ||||
#define GE_CHECK_SIZE(size) \ | |||||
do { \ | |||||
if (size == 0) { \ | |||||
DOMI_LOGE("param[%s] is out of range", #size); \ | |||||
return ge::PARAM_INVALID; \ | |||||
} \ | |||||
} while (0) | |||||
// Check if the container is empty | |||||
#define GE_CHECK_VECTOR_NOT_EMPTY(vector) \ | |||||
do { \ | |||||
if (vector.empty()) { \ | |||||
DOMI_LOGE("param[%s] is empty!", #vector); \ | |||||
return ge::FAILED; \ | |||||
} \ | |||||
#define GE_CHECK_SIZE(size) \ | |||||
do { \ | |||||
if (size == 0) { \ | |||||
DOMI_LOGE("param[%s] is out of range", #size); \ | |||||
return ge::PARAM_INVALID; \ | |||||
} \ | |||||
} while (0) | } while (0) | ||||
// Check if the value on the left is greater than or equal to the value on the right | // Check if the value on the left is greater than or equal to the value on the right | ||||
#define GE_CHECK_GE(lhs, rhs) \ | |||||
do { \ | |||||
if (lhs < rhs) { \ | |||||
DOMI_LOGE("param[%s] is less than[%s]", #lhs, #rhs); \ | |||||
return ge::PARAM_INVALID; \ | |||||
} \ | |||||
#define GE_CHECK_GE(lhs, rhs) \ | |||||
do { \ | |||||
if (lhs < rhs) { \ | |||||
DOMI_LOGE("param[%s] is less than[%s]", #lhs, #rhs); \ | |||||
return ge::PARAM_INVALID; \ | |||||
} \ | |||||
} while (0) | } while (0) | ||||
// Check if the value on the left is less than or equal to the value on the right | // Check if the value on the left is less than or equal to the value on the right | ||||
#define GE_CHECK_LE(lhs, rhs) \ | |||||
do { \ | |||||
if (lhs > rhs) { \ | |||||
DOMI_LOGE("param[%s] is greater than[%s]", #lhs, #rhs); \ | |||||
return ge::PARAM_INVALID; \ | |||||
} \ | |||||
#define GE_CHECK_LE(lhs, rhs) \ | |||||
do { \ | |||||
if (lhs > rhs) { \ | |||||
DOMI_LOGE("param[%s] is greater than[%s]", #lhs, #rhs); \ | |||||
return ge::PARAM_INVALID; \ | |||||
} \ | |||||
} while (0) | } while (0) | ||||
#define GE_DELETE_NEW_SINGLE(var) \ | #define GE_DELETE_NEW_SINGLE(var) \ | ||||
@@ -209,6 +200,17 @@ | |||||
} \ | } \ | ||||
} while (0) | } while (0) | ||||
#define GE_FREE_RT_LOG(addr) \ | |||||
do { \ | |||||
if (addr != nullptr) { \ | |||||
rtError_t error = rtFree(addr); \ | |||||
if (error != RT_ERROR_NONE) { \ | |||||
GELOGE(RT_FAILED, "Call rtFree failed, error: %#x", error); \ | |||||
} \ | |||||
addr = nullptr; \ | |||||
} \ | |||||
} while (0) | |||||
/** | /** | ||||
* @ingroup domi_common | * @ingroup domi_common | ||||
* @brief version of om.proto file | * @brief version of om.proto file | ||||
@@ -1 +1 @@ | |||||
Subproject commit 2607691fc5edaad412d21c9f4a3284b02cfc8c5e | |||||
Subproject commit 140538eadb161278f1c733e7850bfaba65cf665e |
@@ -1 +1 @@ | |||||
Subproject commit 6a07f1a8b9b8b4630a5b60d9d8d02ec4a6314d68 | |||||
Subproject commit b203d47837421b2c149f353fc0808f6a29fa584e |
@@ -18,6 +18,8 @@ | |||||
using namespace ErrorMessage; | using namespace ErrorMessage; | ||||
thread_local Context ErrorManager::error_context_ = {0, "", "", ""}; | |||||
ErrorManager &ErrorManager::GetInstance() { | ErrorManager &ErrorManager::GetInstance() { | ||||
static ErrorManager instance; | static ErrorManager instance; | ||||
return instance; | return instance; | ||||
@@ -40,6 +42,10 @@ using namespace ErrorMessage; | |||||
return 0; | return 0; | ||||
} | } | ||||
int ErrorManager::ReportInterErrMessage(std::string error_code, const std::string &error_msg) { | |||||
return 0; | |||||
} | |||||
/// | /// | ||||
/// @brief output error message | /// @brief output error message | ||||
/// @param [in] handle: print handle | /// @param [in] handle: print handle | ||||
@@ -84,7 +90,7 @@ using namespace ErrorMessage; | |||||
void ErrorManager::GenWorkStreamIdBySessionGraph(uint64_t session_id, uint64_t graph_id) {} | void ErrorManager::GenWorkStreamIdBySessionGraph(uint64_t session_id, uint64_t graph_id) {} | ||||
const std::string &ErrorManager::GetLogHeader() { return "[TEST][TEST]"; } | |||||
const std::string &ErrorManager::GetLogHeader() { return error_context_.log_header; } | |||||
struct Context &ErrorManager::GetErrorContext() { | struct Context &ErrorManager::GetErrorContext() { | ||||
struct Context error_context; | struct Context error_context; | ||||
@@ -269,7 +269,7 @@ CHAR *mmDlerror() | |||||
INT32 mmDladdr(VOID *addr, mmDlInfo *info) | INT32 mmDladdr(VOID *addr, mmDlInfo *info) | ||||
{ | { | ||||
return 0; | |||||
return -1; | |||||
} | } | ||||
VOID *mmDlopen(const CHAR *fileName, INT32 mode) | VOID *mmDlopen(const CHAR *fileName, INT32 mode) | ||||
@@ -38,6 +38,7 @@ include_directories(${GE_CODE_DIR}/metadef/inc) | |||||
include_directories(${GE_CODE_DIR}/metadef/inc/graph) | include_directories(${GE_CODE_DIR}/metadef/inc/graph) | ||||
include_directories(${GE_CODE_DIR}/metadef/inc/common) | include_directories(${GE_CODE_DIR}/metadef/inc/common) | ||||
include_directories(${GE_CODE_DIR}/metadef/third_party) | include_directories(${GE_CODE_DIR}/metadef/third_party) | ||||
include_directories(${GE_CODE_DIR}/metadef/third_party/transformer/inc) | |||||
include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc) | include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc) | ||||
include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/ops) | include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/ops) | ||||
include_directories(${CMAKE_BINARY_DIR}) | include_directories(${CMAKE_BINARY_DIR}) | ||||
@@ -98,8 +99,8 @@ set(SRC_FILES | |||||
"${GE_CODE_DIR}/metadef/graph/utils/transformer_utils.cc" | "${GE_CODE_DIR}/metadef/graph/utils/transformer_utils.cc" | ||||
"${GE_CODE_DIR}/metadef/graph/runtime_inference_context.cc" | "${GE_CODE_DIR}/metadef/graph/runtime_inference_context.cc" | ||||
"${GE_CODE_DIR}/metadef/graph/ref_relation.cc" | "${GE_CODE_DIR}/metadef/graph/ref_relation.cc" | ||||
"${GE_CODE_DIR}/metadef/third_party/transformer/src/transfer_shape_according_to_format.cpp" | |||||
"${GE_CODE_DIR}/metadef/third_party/transformer/src/axis_util.cpp" | |||||
"${GE_CODE_DIR}/metadef/third_party/transformer/src/transfer_shape_according_to_format.cc" | |||||
"${GE_CODE_DIR}/metadef/third_party/transformer/src/axis_util.cc" | |||||
) | ) | ||||
#add_executable(ut_libgraph ${UT_FILES} ${SRC_FILES} ${PROTO_SRCS} ${PROTO_HDRS}) | #add_executable(ut_libgraph ${UT_FILES} ${SRC_FILES} ${PROTO_SRCS} ${PROTO_HDRS}) | ||||
@@ -45,6 +45,7 @@ include_directories(${GE_CODE_DIR}/inc) | |||||
include_directories(${GE_CODE_DIR}/metadef/inc) | include_directories(${GE_CODE_DIR}/metadef/inc) | ||||
include_directories(${GE_CODE_DIR}/ge) | include_directories(${GE_CODE_DIR}/ge) | ||||
include_directories(${GE_CODE_DIR}/ge/inc) | include_directories(${GE_CODE_DIR}/ge/inc) | ||||
include_directories(${GE_CODE_DIR}/ge/ir_build) | |||||
include_directories(${GE_CODE_DIR}/metadef) | include_directories(${GE_CODE_DIR}/metadef) | ||||
include_directories(${GE_CODE_DIR}/metadef/graph) | include_directories(${GE_CODE_DIR}/metadef/graph) | ||||
include_directories(${GE_CODE_DIR}/inc/external) | include_directories(${GE_CODE_DIR}/inc/external) | ||||
@@ -54,6 +55,7 @@ include_directories(${GE_CODE_DIR}/metadef/inc/graph) | |||||
include_directories(${GE_CODE_DIR}/inc/framework) | include_directories(${GE_CODE_DIR}/inc/framework) | ||||
include_directories(${GE_CODE_DIR}/metadef/inc/common) | include_directories(${GE_CODE_DIR}/metadef/inc/common) | ||||
include_directories(${GE_CODE_DIR}/metadef/third_party) | include_directories(${GE_CODE_DIR}/metadef/third_party) | ||||
include_directories(${GE_CODE_DIR}/metadef/third_party/transformer/inc) | |||||
include_directories(${GE_CODE_DIR}/parser) | include_directories(${GE_CODE_DIR}/parser) | ||||
include_directories(${GE_CODE_DIR}/parser/parser) | include_directories(${GE_CODE_DIR}/parser/parser) | ||||
include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc) | include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc) | ||||
@@ -61,6 +63,7 @@ include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/cce) | |||||
include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/ops) | include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/ops) | ||||
include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain) | include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain) | ||||
include_directories(${GE_CODE_DIR}/tests/ut/ge) | include_directories(${GE_CODE_DIR}/tests/ut/ge) | ||||
include_directories(${GE_CODE_DIR}/tests/ut/common) | |||||
include_directories(${CMAKE_BINARY_DIR}) | include_directories(${CMAKE_BINARY_DIR}) | ||||
include_directories(${CMAKE_BINARY_DIR}/proto/ge) | include_directories(${CMAKE_BINARY_DIR}/proto/ge) | ||||
include_directories(${CMAKE_BINARY_DIR}/proto/ge/proto) | include_directories(${CMAKE_BINARY_DIR}/proto/ge/proto) | ||||
@@ -85,8 +88,8 @@ set(GRAPH_SRC_FILES | |||||
"${GE_CODE_DIR}/metadef/graph/node.cc" | "${GE_CODE_DIR}/metadef/graph/node.cc" | ||||
"${GE_CODE_DIR}/metadef/graph/runtime_inference_context.cc" | "${GE_CODE_DIR}/metadef/graph/runtime_inference_context.cc" | ||||
"${GE_CODE_DIR}/metadef/graph/op_desc.cc" | "${GE_CODE_DIR}/metadef/graph/op_desc.cc" | ||||
"${GE_CODE_DIR}/metadef/third_party/transformer/src/transfer_shape_according_to_format.cpp" | |||||
"${GE_CODE_DIR}/metadef/third_party/transformer/src/axis_util.cpp" | |||||
"${GE_CODE_DIR}/metadef/third_party/transformer/src/transfer_shape_according_to_format.cc" | |||||
"${GE_CODE_DIR}/metadef/third_party/transformer/src/axis_util.cc" | |||||
"${GE_CODE_DIR}/metadef/graph/operator.cc" | "${GE_CODE_DIR}/metadef/graph/operator.cc" | ||||
"${GE_CODE_DIR}/metadef/graph/operator_factory.cc" | "${GE_CODE_DIR}/metadef/graph/operator_factory.cc" | ||||
"${GE_CODE_DIR}/metadef/graph/operator_factory_impl.cc" | "${GE_CODE_DIR}/metadef/graph/operator_factory_impl.cc" | ||||
@@ -732,6 +735,7 @@ set(KERNEL_TEST_FILES | |||||
set(MULTI_PARTS_TEST_FILES | set(MULTI_PARTS_TEST_FILES | ||||
"graph_ir/ge_operator_factory_unittest.cc" | "graph_ir/ge_operator_factory_unittest.cc" | ||||
"graph_ir/ge_ir_build_unittest.cc" | |||||
"graph/transop_util_unittest.cc" | "graph/transop_util_unittest.cc" | ||||
"common/datatype_transfer_unittest.cc" | "common/datatype_transfer_unittest.cc" | ||||
"common/dump_manager_unittest.cc" | "common/dump_manager_unittest.cc" | ||||
@@ -9136,23 +9136,23 @@ TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type2) { | |||||
EXPECT_EQ(transfer.TransFormat(args, result), ACL_ERROR_GE_DATATYPE_INVALID); | EXPECT_EQ(transfer.TransFormat(args, result), ACL_ERROR_GE_DATATYPE_INVALID); | ||||
} | } | ||||
TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type3) { | |||||
uint16_t data[1 * 1 * 1 * 16 * 16] = {0}; | |||||
TransArgs args{reinterpret_cast<uint8_t *>(data), | |||||
FORMAT_FRACTAL_NZ, | |||||
FORMAT_NHWC, | |||||
{1, 1, 1, 16, 16}, | |||||
{ | |||||
1, | |||||
1, | |||||
4, | |||||
4, | |||||
}, | |||||
DT_VARIANT}; | |||||
TransResult result; | |||||
FormatTransferFractalNzND transfer; | |||||
EXPECT_EQ(transfer.TransFormat(args, result), ACL_ERROR_GE_DATATYPE_INVALID); | |||||
} | |||||
// TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type3) { | |||||
// uint16_t data[1 * 1 * 1 * 16 * 16] = {0}; | |||||
// TransArgs args{reinterpret_cast<uint8_t *>(data), | |||||
// FORMAT_FRACTAL_NZ, | |||||
// FORMAT_NHWC, | |||||
// {1, 1, 1, 16, 16}, | |||||
// { | |||||
// 1, | |||||
// 1, | |||||
// 4, | |||||
// 4, | |||||
// }, | |||||
// DT_VARIANT}; | |||||
// TransResult result; | |||||
// FormatTransferFractalNzND transfer; | |||||
// EXPECT_EQ(transfer.TransFormat(args, result), ACL_ERROR_GE_DATATYPE_INVALID); | |||||
// } | |||||
TEST_F(UtestFormatTransferNdFractNz, invalid_dst_format2) { | TEST_F(UtestFormatTransferNdFractNz, invalid_dst_format2) { | ||||
uint16_t data[1 * 1 * 1 * 1 * 16 * 16] = {0}; | uint16_t data[1 * 1 * 1 * 1 * 16 * 16] = {0}; | ||||
@@ -5354,14 +5354,14 @@ TEST_F(UtestFormatTransferNhwcFz, build_transfer_uint8) { | |||||
EXPECT_NE(transfer, nullptr); | EXPECT_NE(transfer, nullptr); | ||||
} | } | ||||
TEST_F(UtestFormatTransferNhwcFz, invalid_data_type) { | |||||
uint16_t data[1 * 4 * 4 * 1] = {0}; | |||||
TransArgs args{ | |||||
reinterpret_cast<uint8_t *>(data), FORMAT_NHWC, FORMAT_FRACTAL_NZ, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_VARIANT}; | |||||
FormatTransferFractalZ transfer; | |||||
EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | |||||
ACL_ERROR_GE_DATATYPE_INVALID); | |||||
} | |||||
// TEST_F(UtestFormatTransferNhwcFz, invalid_data_type) { | |||||
// uint16_t data[1 * 4 * 4 * 1] = {0}; | |||||
// TransArgs args{ | |||||
// reinterpret_cast<uint8_t *>(data), FORMAT_NHWC, FORMAT_FRACTAL_NZ, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_VARIANT}; | |||||
// FormatTransferFractalZ transfer; | |||||
// EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | |||||
// ACL_ERROR_GE_DATATYPE_INVALID); | |||||
// } | |||||
TEST_F(UtestFormatTransferNhwcFz, invalid_data_format) { | TEST_F(UtestFormatTransferNhwcFz, invalid_data_format) { | ||||
uint16_t data[1 * 4 * 4 * 1] = {0}; | uint16_t data[1 * 4 * 4 * 1] = {0}; | ||||
@@ -52,34 +52,34 @@ TEST_F(UtestFormatTransfer, build_unsupported_transfer) { | |||||
EXPECT_EQ(transfer2, nullptr); | EXPECT_EQ(transfer2, nullptr); | ||||
} | } | ||||
TEST_F(UtestFormatTransfer, get_size_by_data_type) { | |||||
EXPECT_EQ(GetSizeByDataType(DT_FLOAT), 4); | |||||
EXPECT_EQ(GetSizeByDataType(DT_FLOAT16), 2); | |||||
EXPECT_EQ(GetSizeByDataType(DT_INT8), 1); | |||||
EXPECT_EQ(GetSizeByDataType(DT_INT16), 2); | |||||
EXPECT_EQ(GetSizeByDataType(DT_UINT16), 2); | |||||
EXPECT_EQ(GetSizeByDataType(DT_UINT8), 1); | |||||
EXPECT_EQ(GetSizeByDataType(DT_INT32), 4); | |||||
EXPECT_EQ(GetSizeByDataType(DT_INT64), 8); | |||||
EXPECT_EQ(GetSizeByDataType(DT_UINT32), 4); | |||||
EXPECT_EQ(GetSizeByDataType(DT_UINT64), 8); | |||||
EXPECT_EQ(GetSizeByDataType(DT_BOOL), 1); | |||||
EXPECT_EQ(GetSizeByDataType(DT_DOUBLE), 8); | |||||
EXPECT_EQ(GetSizeByDataType(DT_STRING), -1); | |||||
EXPECT_EQ(GetSizeByDataType(DT_DUAL_SUB_INT8), 1); | |||||
EXPECT_EQ(GetSizeByDataType(DT_DUAL_SUB_UINT8), 1); | |||||
EXPECT_EQ(GetSizeByDataType(DT_COMPLEX64), 8); | |||||
EXPECT_EQ(GetSizeByDataType(DT_COMPLEX128), 16); | |||||
EXPECT_EQ(GetSizeByDataType(DT_QINT8), 1); | |||||
EXPECT_EQ(GetSizeByDataType(DT_QINT16), 2); | |||||
EXPECT_EQ(GetSizeByDataType(DT_QINT32), 4); | |||||
EXPECT_EQ(GetSizeByDataType(DT_QUINT8), 1); | |||||
EXPECT_EQ(GetSizeByDataType(DT_QUINT16), 2); | |||||
EXPECT_EQ(GetSizeByDataType(DT_RESOURCE), -1); | |||||
EXPECT_EQ(GetSizeByDataType(DT_STRING_REF), -1); | |||||
EXPECT_EQ(GetSizeByDataType(DT_DUAL), 5); | |||||
EXPECT_EQ(GetSizeByDataType(DT_UNDEFINED), -1); | |||||
EXPECT_EQ(DT_UNDEFINED, 27); | |||||
} | |||||
// TEST_F(UtestFormatTransfer, get_size_by_data_type) { | |||||
// EXPECT_EQ(GetSizeByDataType(DT_FLOAT), 4); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_FLOAT16), 2); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_INT8), 1); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_INT16), 2); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_UINT16), 2); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_UINT8), 1); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_INT32), 4); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_INT64), 8); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_UINT32), 4); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_UINT64), 8); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_BOOL), 1); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_DOUBLE), 8); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_STRING), -1); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_DUAL_SUB_INT8), 1); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_DUAL_SUB_UINT8), 1); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_COMPLEX64), 8); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_COMPLEX128), 16); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_QINT8), 1); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_QINT16), 2); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_QINT32), 4); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_QUINT8), 1); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_QUINT16), 2); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_RESOURCE), -1); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_STRING_REF), -1); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_DUAL), 5); | |||||
// EXPECT_EQ(GetSizeByDataType(DT_UNDEFINED), -1); | |||||
// EXPECT_EQ(DT_UNDEFINED, 27); | |||||
// } | |||||
} // namespace formats | } // namespace formats | ||||
} // namespace ge | } // namespace ge |
@@ -31,7 +31,7 @@ TEST_F(UTEST_opdebug_register, register_debug_for_model_success) { | |||||
OpdebugRegister opdebug_register; | OpdebugRegister opdebug_register; | ||||
rtModel_t model_handle = (void*)0x111; | rtModel_t model_handle = (void*)0x111; | ||||
uint32_t op_debug_mode = 1; | uint32_t op_debug_mode = 1; | ||||
DataDumper data_dumper; | |||||
DataDumper data_dumper({}); | |||||
auto ret = opdebug_register.RegisterDebugForModel(model_handle, op_debug_mode, data_dumper); | auto ret = opdebug_register.RegisterDebugForModel(model_handle, op_debug_mode, data_dumper); | ||||
opdebug_register.UnregisterDebugForModel(model_handle); | opdebug_register.UnregisterDebugForModel(model_handle); | ||||
EXPECT_EQ(ret, ge::SUCCESS); | EXPECT_EQ(ret, ge::SUCCESS); | ||||
@@ -41,7 +41,7 @@ TEST_F(UTEST_opdebug_register, register_debug_for_stream_success) { | |||||
OpdebugRegister opdebug_register; | OpdebugRegister opdebug_register; | ||||
rtStream_t stream = (void*)0x111; | rtStream_t stream = (void*)0x111; | ||||
uint32_t op_debug_mode = 1; | uint32_t op_debug_mode = 1; | ||||
DataDumper data_dumper; | |||||
DataDumper data_dumper({}); | |||||
auto ret = opdebug_register.RegisterDebugForStream(stream, op_debug_mode, data_dumper); | auto ret = opdebug_register.RegisterDebugForStream(stream, op_debug_mode, data_dumper); | ||||
opdebug_register.UnregisterDebugForStream(stream); | opdebug_register.UnregisterDebugForStream(stream); | ||||
EXPECT_EQ(ret, ge::SUCCESS); | EXPECT_EQ(ret, ge::SUCCESS); | ||||
@@ -20,6 +20,11 @@ | |||||
#define protected public | #define protected public | ||||
#include "generator/ge_generator.h" | #include "generator/ge_generator.h" | ||||
#include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
#include "graph/attr_value.h" | |||||
#include "graph/debug/ge_attr_define.h" | |||||
#include "graph/utils/graph_utils.h" | |||||
#include "../graph/passes/graph_builder_utils.h" | |||||
#include "../graph/manager/graph_manager.h" | |||||
using namespace std; | using namespace std; | ||||
@@ -31,6 +36,16 @@ class UtestGeGenerator : public testing::Test { | |||||
void TearDown() {} | void TearDown() {} | ||||
}; | }; | ||||
namespace { | |||||
ComputeGraphPtr MakeGraph() { | |||||
ge::ut::GraphBuilder builder("graph"); | |||||
auto data = builder.AddNode("data", "Data", 1, 1); | |||||
auto addn1 = builder.AddNode("addn1", "AddN", 1, 1); | |||||
builder.AddDataEdge(data, 0, addn1, 0); | |||||
return builder.GetGraph(); | |||||
} | |||||
} // namespace | |||||
/* | /* | ||||
TEST_F(UtestGeGenerator, test_build_single_op_offline) { | TEST_F(UtestGeGenerator, test_build_single_op_offline) { | ||||
GeTensorDesc tensor_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); | GeTensorDesc tensor_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); | ||||
@@ -71,4 +86,28 @@ TEST_F(UtestGeGenerator, test_build_single_op_online) { | |||||
ModelBufferData model_buffer; | ModelBufferData model_buffer; | ||||
EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, model_buffer), FAILED); | EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, model_buffer), FAILED); | ||||
} | } | ||||
TEST_F(UtestGeGenerator, test_graph_manager) { | |||||
GraphManager graph_manager; | |||||
GraphPartitioner graph_partitioner; | |||||
auto root_graph = MakeGraph(); | |||||
auto sub_graph = MakeGraph(); | |||||
root_graph->AddSubGraph(sub_graph); | |||||
auto sgi = MakeShared<SubGraphInfo>(); | |||||
// set engine name | |||||
sgi->SetEngineName("AIcoreEngine"); | |||||
sgi->SetSubGraph(sub_graph); | |||||
auto sgi_gelocal = MakeShared<SubGraphInfo>(); | |||||
// set engine name | |||||
sgi_gelocal->SetEngineName("GELOCAL"); | |||||
sgi_gelocal->SetSubGraph(sub_graph); | |||||
graph_partitioner.graph_2_input_subgraph_[root_graph] = sgi_gelocal; | |||||
graph_partitioner.graph_2_subgraph_list_.insert({root_graph, {sgi, sgi_gelocal}}); | |||||
graph_partitioner.graph_2_subgraph_list_.insert({sub_graph, {sgi, sgi_gelocal}}); | |||||
EXPECT_EQ(graph_manager.ConvertGraphToFile(root_graph, graph_partitioner, "./"), GRAPH_SUCCESS); | |||||
} | |||||
} // namespace ge | } // namespace ge |
@@ -56,7 +56,7 @@ TEST_F(UtestDataDumper, LoadDumpInfo_no_output_addrs_fail) { | |||||
TEST_F(UtestDataDumper, UnloadDumpInfo_success) { | TEST_F(UtestDataDumper, UnloadDumpInfo_success) { | ||||
RuntimeParam rts_param; | RuntimeParam rts_param; | ||||
DataDumper data_dumper(rts_param); | |||||
DataDumper data_dumper(&rts_param); | |||||
data_dumper.SetModelName("test"); | data_dumper.SetModelName("test"); | ||||
data_dumper.SetModelId(2333); | data_dumper.SetModelId(2333); | ||||
@@ -74,4 +74,18 @@ TEST_F(UtestGraphPreproces, test_dynamic_input_shape_parse) { | |||||
EXPECT_EQ(result_shape.GetDim(i), expect_shape.at(i)); | EXPECT_EQ(result_shape.GetDim(i), expect_shape.at(i)); | ||||
} | } | ||||
} | } | ||||
TEST_F(UtestGraphPreproces, test_check_user_input) { | |||||
ge::GraphPrepare graph_prepare; | |||||
graph_prepare.compute_graph_ = BuildGraph1(); | |||||
vector<int64_t> dim = {2, -3}; | |||||
GeTensor tensor; | |||||
tensor.SetTensorDesc(GeTensorDesc(GeShape(dim))); | |||||
std::vector<GeTensor> user_input; | |||||
user_input.emplace_back(tensor); | |||||
Status ret = graph_prepare.CheckUserInput(user_input); | |||||
EXPECT_EQ(ret, GE_GRAPH_INIT_FAILED); | |||||
} | |||||
} | } |
@@ -0,0 +1,100 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include <gtest/gtest.h> | |||||
#include "ir_build/atc_ir_common.h" | |||||
#include "graph/testcase/ge_graph/graph_builder_utils.h" | |||||
#define protected public | |||||
#define private public | |||||
#undef private | |||||
#undef protected | |||||
const string DATA = "Data"; | |||||
const string AddNYes = "AddNYes"; | |||||
const string NETOUTPUT = "NetOutput"; | |||||
using namespace ge; | |||||
class UtestIrCommon : public testing::Test { | |||||
protected: | |||||
void SetUp() {} | |||||
void TearDown() {} | |||||
}; | |||||
static ge::OpDescPtr CreateOpDesc(const std::string &name, const std::string &type) { | |||||
OpDescPtr op_desc = std::make_shared<ge::OpDesc>(name, type); | |||||
ge::GeTensorDesc ge_tensor_desc; | |||||
op_desc->AddInputDesc("input", ge_tensor_desc); | |||||
op_desc->AddOutputDesc("output", ge_tensor_desc); | |||||
return op_desc; | |||||
} | |||||
static ComputeGraphPtr BuildComputeGraph() { | |||||
auto builder = ut::GraphBuilder("test"); | |||||
auto data1 = builder.AddNode("input1", DATA, 1, 1, FORMAT_NCHW, DT_FLOAT, {1, 2, 3}); | |||||
auto data2 = builder.AddNode("input2", DATA, 1, 1, FORMAT_NCHW, DT_FLOAT, {4, 10}); | |||||
auto addn1 = builder.AddNode("addn1", AddNYes, 2, 1); | |||||
auto netoutput = builder.AddNode("netoutput", NETOUTPUT, 1, 0); | |||||
builder.AddDataEdge(data1, 0, addn1, 0); | |||||
builder.AddDataEdge(data2, 0, addn1, 1); | |||||
builder.AddDataEdge(addn1, 0,netoutput, 0); | |||||
return builder.GetGraph(); | |||||
} | |||||
TEST(UtestIrCommon, update_data_op_shape) { | |||||
ge::OpDescPtr op_desc = CreateOpDesc("Data", "Data"); | |||||
map<string, vector<int64_t>> shape_map; | |||||
shape_map["Data"] = {{1,2}}; | |||||
Status ret = UpdateDataOpShape(op_desc, shape_map); | |||||
EXPECT_EQ(ret, ge::SUCCESS); | |||||
} | |||||
TEST(UtestIrCommon, update_dynamic_shape_range_success) { | |||||
ComputeGraphPtr graph = BuildComputeGraph(); | |||||
std::string input_shape_range = "input1:[1, 2~3, -1];input2:[3~5, 10]"; | |||||
Status ret = UpdateDynamicInputShapeRange(graph, input_shape_range); | |||||
EXPECT_EQ(ret, ge::SUCCESS); | |||||
} | |||||
TEST(UtestIrCommon, update_dynamic_shape_range_failed) { | |||||
ComputeGraphPtr graph = BuildComputeGraph(); | |||||
// 1 | |||||
std::string input_shape_range = "input1;[1, 2~3, -1]"; | |||||
Status ret = UpdateDynamicInputShapeRange(graph, input_shape_range); | |||||
EXPECT_EQ(ret, ge::PARAM_INVALID); | |||||
// 2 | |||||
input_shape_range = "input1:[1, 2~3, -1)"; | |||||
ret = UpdateDynamicInputShapeRange(graph, input_shape_range); | |||||
EXPECT_EQ(ret, ge::PARAM_INVALID); | |||||
//3 | |||||
input_shape_range = "input1:[1, 3~2, -1];input2:[3~5, 10]"; | |||||
ret = UpdateDynamicInputShapeRange(graph, input_shape_range); | |||||
EXPECT_EQ(ret, ge::FAILED); | |||||
//4 | |||||
input_shape_range = "input1:[1, 2~-3, -1]"; | |||||
ret = UpdateDynamicInputShapeRange(graph, input_shape_range); | |||||
EXPECT_EQ(ret, ge::PARAM_INVALID); | |||||
} |
@@ -15,8 +15,8 @@ | |||||
*/ | */ | ||||
#include <gtest/gtest.h> | #include <gtest/gtest.h> | ||||
#include <gmock/gmock.h> | |||||
#include <vector> | #include <vector> | ||||
#include "runtime/rt.h" | #include "runtime/rt.h" | ||||
#define protected public | #define protected public | ||||
@@ -25,7 +25,6 @@ | |||||
#include "hybrid/model/hybrid_model.h" | #include "hybrid/model/hybrid_model.h" | ||||
#include "model/ge_model.h" | #include "model/ge_model.h" | ||||
#include "model/ge_root_model.h" | #include "model/ge_root_model.h" | ||||
#include "hybrid/node_executor/aicore/aicore_op_task.h" | #include "hybrid/node_executor/aicore/aicore_op_task.h" | ||||
#include "framework/common/taskdown_common.h" | #include "framework/common/taskdown_common.h" | ||||
#include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
@@ -33,7 +32,10 @@ | |||||
#include "hybrid/executor/hybrid_execution_context.h" | #include "hybrid/executor/hybrid_execution_context.h" | ||||
#include "hybrid/node_executor/aicore/aicore_task_builder.h" | #include "hybrid/node_executor/aicore/aicore_task_builder.h" | ||||
#include "graph/load/model_manager/tbe_handle_store.h" | #include "graph/load/model_manager/tbe_handle_store.h" | ||||
#include "graph/manager/graph_mem_allocator.h" | |||||
#include "hybrid/common/npu_memory_allocator.h" | |||||
#include "graph/types.h" | #include "graph/types.h" | ||||
#include "graph/utils/tensor_utils.h" | |||||
#undef private | #undef private | ||||
#undef protected | #undef protected | ||||
@@ -43,6 +45,7 @@ using namespace testing; | |||||
using namespace ge; | using namespace ge; | ||||
using namespace hybrid; | using namespace hybrid; | ||||
class UtestGeHybrid : public testing::Test { | class UtestGeHybrid : public testing::Test { | ||||
protected: | protected: | ||||
void SetUp() {} | void SetUp() {} | ||||
@@ -152,6 +155,20 @@ TEST_F(UtestGeHybrid, index_taskdefs_failed) { | |||||
ASSERT_EQ(hybrid_model_builder.IndexTaskDefs(graph, ge_model), INTERNAL_ERROR); | ASSERT_EQ(hybrid_model_builder.IndexTaskDefs(graph, ge_model), INTERNAL_ERROR); | ||||
} | } | ||||
TEST_F(UtestGeHybrid, parse_force_infershape_nodes) { | |||||
const char *const kForceInfershape = "_force_infershape_when_running"; | |||||
auto graph = make_shared<ComputeGraph>("graph"); | |||||
OpDescPtr op_desc = CreateOpDesc("Conv2D", "Conv2D"); | |||||
ge::AttrUtils::SetBool(op_desc, kForceInfershape, true); | |||||
auto node = graph->AddNode(op_desc); | |||||
std::unique_ptr<NodeItem> new_node; | |||||
NodeItem::Create(node, new_node); | |||||
GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph); | |||||
HybridModel hybrid_model(ge_root_model); | |||||
HybridModelBuilder hybrid_model_builder(hybrid_model); | |||||
ASSERT_EQ(hybrid_model_builder.ParseForceInfershapeNodes(node, *new_node), SUCCESS); | |||||
} | |||||
TEST_F(UtestGeHybrid, index_taskdefs_success) { | TEST_F(UtestGeHybrid, index_taskdefs_success) { | ||||
// build aicore task | // build aicore task | ||||
domi::ModelTaskDef model_task_def; | domi::ModelTaskDef model_task_def; | ||||
@@ -190,4 +207,39 @@ TEST_F(UtestGeHybrid, index_taskdefs_success) { | |||||
HybridModelBuilder hybrid_model_builder(hybrid_model); | HybridModelBuilder hybrid_model_builder(hybrid_model); | ||||
ASSERT_EQ(hybrid_model_builder.IndexTaskDefs(graph, ge_model), SUCCESS); | ASSERT_EQ(hybrid_model_builder.IndexTaskDefs(graph, ge_model), SUCCESS); | ||||
} | |||||
TEST_F(UtestGeHybrid, init_weight_success) { | |||||
NpuMemoryAllocator::allocators_.emplace(make_pair(0, nullptr)); | |||||
// make graph with sub_graph | |||||
ComputeGraphPtr graph = std::make_shared<ComputeGraph>("root_graph"); | |||||
OpDescPtr op_desc = CreateOpDesc("if", IF); | |||||
NodePtr node = graph->AddNode(op_desc); | |||||
// make sub graph | |||||
ComputeGraphPtr sub_graph = std::make_shared<ComputeGraph>("if_sub_graph"); | |||||
OpDescPtr const_op_desc = CreateOpDesc("const", CONSTANT); | |||||
vector<int64_t> dims_vec_0 = {2, 1, 4, 1, 2}; | |||||
vector<int32_t> data_vec_0 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; | |||||
GeTensorDesc tensor_desc_0(GeShape(dims_vec_0), FORMAT_NCHW, DT_INT32); | |||||
(void)TensorUtils::SetRealDimCnt(tensor_desc_0, dims_vec_0.size()); | |||||
ConstGeTensorPtr constTensor_0 = | |||||
std::make_shared<GeTensor>(tensor_desc_0, (uint8_t *)&data_vec_0[0], data_vec_0.size() * sizeof(int32_t)); | |||||
AttrUtils::SetTensor(const_op_desc, ge::ATTR_NAME_WEIGHTS, constTensor_0); | |||||
const_op_desc->AddOutputDesc(tensor_desc_0); | |||||
NodePtr const_node = sub_graph->AddNode(const_op_desc); | |||||
graph->AddSubgraph("sub", sub_graph); | |||||
GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph); | |||||
GeModelPtr ge_sub_model = make_shared<GeModel>(); | |||||
//Buffer weight_buffer = Buffer(128,0); | |||||
//ge_sub_model->SetWeight(weight_buffer); | |||||
ge_root_model->SetSubgraphInstanceNameToModel("sub",ge_sub_model); | |||||
HybridModel hybrid_model(ge_root_model); | |||||
HybridModelBuilder hybrid_model_builder(hybrid_model); | |||||
auto ret = hybrid_model_builder.InitWeights(); | |||||
ASSERT_EQ(ret,SUCCESS); | |||||
Buffer weight_buffer = Buffer(128,0); | |||||
ge_sub_model->SetWeight(weight_buffer); | |||||
ret = hybrid_model_builder.InitWeights(); | |||||
ASSERT_EQ(ret,PARAM_INVALID); | |||||
} | } |