@@ -195,6 +195,7 @@ set(TRAIN_SRC_LIST | |||||
"graph/passes/atomic_addr_clean_pass.cc" | "graph/passes/atomic_addr_clean_pass.cc" | ||||
"graph/passes/mark_same_addr_pass.cc" | "graph/passes/mark_same_addr_pass.cc" | ||||
"graph/passes/mark_graph_unknown_status_pass.cc" | "graph/passes/mark_graph_unknown_status_pass.cc" | ||||
"graph/passes/mark_node_unknown_shape_pass.cc" | |||||
"graph/passes/mark_agnostic_pass.cc" | "graph/passes/mark_agnostic_pass.cc" | ||||
"graph/partition/dynamic_shape_partition.cc" | "graph/partition/dynamic_shape_partition.cc" | ||||
"graph/partition/stage_partition.cc" | "graph/partition/stage_partition.cc" | ||||
@@ -505,6 +506,7 @@ set(INFER_SRC_LIST | |||||
"graph/passes/atomic_addr_clean_pass.cc" | "graph/passes/atomic_addr_clean_pass.cc" | ||||
"graph/passes/mark_same_addr_pass.cc" | "graph/passes/mark_same_addr_pass.cc" | ||||
"graph/passes/mark_graph_unknown_status_pass.cc" | "graph/passes/mark_graph_unknown_status_pass.cc" | ||||
"graph/passes/mark_node_unknown_shape_pass.cc" | |||||
"graph/passes/mark_agnostic_pass.cc" | "graph/passes/mark_agnostic_pass.cc" | ||||
"graph/common/omg_util.cc" | "graph/common/omg_util.cc" | ||||
"graph/common/bcast.cc" | "graph/common/bcast.cc" | ||||
@@ -114,6 +114,7 @@ OMG_HOST_SRC_FILES := \ | |||||
graph/passes/atomic_addr_clean_pass.cc \ | graph/passes/atomic_addr_clean_pass.cc \ | ||||
graph/passes/mark_same_addr_pass.cc \ | graph/passes/mark_same_addr_pass.cc \ | ||||
graph/passes/mark_graph_unknown_status_pass.cc \ | graph/passes/mark_graph_unknown_status_pass.cc \ | ||||
graph/passes/mark_node_unknown_shape_pass.cc \ | |||||
graph/passes/mark_agnostic_pass.cc \ | graph/passes/mark_agnostic_pass.cc \ | ||||
graph/common/omg_util.cc \ | graph/common/omg_util.cc \ | ||||
graph/common/bcast.cc \ | graph/common/bcast.cc \ | ||||
@@ -114,6 +114,7 @@ LIBGE_LOCAL_SRC_FILES := \ | |||||
graph/passes/atomic_addr_clean_pass.cc \ | graph/passes/atomic_addr_clean_pass.cc \ | ||||
graph/passes/mark_same_addr_pass.cc \ | graph/passes/mark_same_addr_pass.cc \ | ||||
graph/passes/mark_graph_unknown_status_pass.cc \ | graph/passes/mark_graph_unknown_status_pass.cc \ | ||||
graph/passes/mark_node_unknown_shape_pass.cc \ | |||||
graph/passes/mark_agnostic_pass.cc \ | graph/passes/mark_agnostic_pass.cc \ | ||||
graph/partition/dynamic_shape_partition.cc \ | graph/partition/dynamic_shape_partition.cc \ | ||||
graph/partition/stage_partition.cc \ | graph/partition/stage_partition.cc \ | ||||
@@ -52,6 +52,7 @@ constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | |||||
const int64_t kDynamicDimValue = -2; | const int64_t kDynamicDimValue = -2; | ||||
const int kDefaultDeviceId = 0; | const int kDefaultDeviceId = 0; | ||||
const int kDefaultJobId = 0; | const int kDefaultJobId = 0; | ||||
const int32_t kFuzzBuildPattern = 1; | |||||
std::map<ge::OpEngineType, std::string> engine_type_map{ | std::map<ge::OpEngineType, std::string> engine_type_map{ | ||||
{ge::ENGINE_SYS, kEngineNameDefault}, | {ge::ENGINE_SYS, kEngineNameDefault}, | ||||
@@ -298,6 +299,38 @@ static Status ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTenso | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
static Status GetFuzzBuildAttrs(const OpDescPtr &op_desc, const GeRootModelPtr &ge_root_model, | |||||
GeAttrValue::LIST_NAMED_ATTRS &fuzz_build_attrs) { | |||||
GELOGD("Start get fuzz build attrs of %s.", op_desc->GetName().c_str()); | |||||
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | |||||
for (const auto &node : ge_root_model->GetRootGraph()->GetAllNodes()) { | |||||
GE_CHECK_NOTNULL(node); | |||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
GELOGD("Delete fuzz build attr of %s after build.", node->GetName().c_str()); | |||||
node->GetOpDesc()->DelAttr(ATTR_NAME_FUZZ_BUILD); | |||||
} | |||||
(void)AttrUtils::GetListNamedAttrs(op_desc, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs); | |||||
if (!fuzz_build_attrs.empty()) { | |||||
GELOGD("%s has split, get ATTR_NAME_FUZZ_BUILD_RES_ATTRS directly.", op_desc->GetName().c_str()); | |||||
return SUCCESS; | |||||
} else { | |||||
GELOGW("%s build with fuzz build pattern, but not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", op_desc->GetName().c_str()); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
static bool HasShapeRange(const vector<GeTensor> &inputs) { | |||||
for (const auto &input : inputs) { | |||||
vector<pair<int64_t, int64_t>> shape_range; | |||||
(void)input.GetTensorDesc().GetShapeRange(shape_range); | |||||
if (!shape_range.empty()) { | |||||
GELOGD("Has set shape range."); | |||||
return true; | |||||
} | |||||
} | |||||
return false; | |||||
} | |||||
class GeGenerator::Impl { | class GeGenerator::Impl { | ||||
public: | public: | ||||
Impl(OmgContext &omg_context) : omg_context_(omg_context) {} | Impl(OmgContext &omg_context) : omg_context_(omg_context) {} | ||||
@@ -747,7 +780,8 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> | |||||
Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | ||||
const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | ||||
bool is_offline) { | |||||
bool is_offline, int32_t compile_flag) { | |||||
GELOGD("Inputs size is %zu, outputs size is %zu.", inputs.size(), outputs.size()); | |||||
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | ||||
impl_->is_offline_ = is_offline; | impl_->is_offline_ = is_offline; | ||||
if (!is_offline) { | if (!is_offline) { | ||||
@@ -769,6 +803,16 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
OpDescPtr op_desc_tmp = AttrUtils::CloneOpDesc(op_desc); | OpDescPtr op_desc_tmp = AttrUtils::CloneOpDesc(op_desc); | ||||
GE_CHECK_NOTNULL(op_desc_tmp); | GE_CHECK_NOTNULL(op_desc_tmp); | ||||
bool fuzz_compile_flag = false; | |||||
if (!HasShapeRange(inputs) && compile_flag == kFuzzBuildPattern) { | |||||
fuzz_compile_flag = true; | |||||
} | |||||
if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, fuzz_compile_flag)) { | |||||
GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD] Failed to set attr for %s.", op_desc->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
impl_->omg_context_.fuzz_compile_flag = fuzz_compile_flag; | |||||
// 1. Create ComputeGraph. | // 1. Create ComputeGraph. | ||||
string name = ge::CurrentTimeInStr() + "_" + model_file_name; | string name = ge::CurrentTimeInStr() + "_" + model_file_name; | ||||
Graph graph; | Graph graph; | ||||
@@ -815,6 +859,19 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(outputs, outputs_dynamic)); | GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(outputs, outputs_dynamic)); | ||||
GE_CHK_STATUS_RET_NOLOG( | GE_CHK_STATUS_RET_NOLOG( | ||||
impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic)); | impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic)); | ||||
} else if (fuzz_compile_flag) { | |||||
GELOGD("Get fuzz build result of %s.", op_desc->GetName().c_str()); | |||||
(void)AttrUtils::SetInt(ge_model, ATTR_NAME_BUILD_MODE, fuzz_compile_flag); | |||||
GeAttrValue::LIST_NAMED_ATTRS fuzz_build_attrs; | |||||
if (GetFuzzBuildAttrs(op_desc, ge_root_model, fuzz_build_attrs) != SUCCESS) { | |||||
GELOGE(FAILED, "[Get][FuzzRet]Failed to get fuzz build result of %s.", op_desc->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
if (!fuzz_build_attrs.empty()) { | |||||
GE_CHK_BOOL_EXEC(AttrUtils::SetListNamedAttrs(ge_model, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs), | |||||
return FAILED, "Set ATTR_NAME_FUZZ_BUILD_RES_ATTRS failed."); | |||||
} | |||||
GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | |||||
} else { | } else { | ||||
GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | ||||
} | } | ||||
@@ -830,15 +887,17 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
* @param [in] vector<GeTensor> &inputs: Operator input data description information. | * @param [in] vector<GeTensor> &inputs: Operator input data description information. | ||||
* @param [in] vector<GeTensor> &outputs: Operator output data description information. | * @param [in] vector<GeTensor> &outputs: Operator output data description information. | ||||
* @param [in] const string &model_file_name: Offline model filename. | * @param [in] const string &model_file_name: Offline model filename. | ||||
* @param [in] compile_flag: op build flag from atc | |||||
* @return SUCCESS handle successfully / others handle failed | * @return SUCCESS handle successfully / others handle failed | ||||
*/ | */ | ||||
Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | ||||
const vector<GeTensor> &outputs, const string &model_file_name) { | |||||
const vector<GeTensor> &outputs, const string &model_file_name, | |||||
int32_t compile_flag) { | |||||
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | ||||
GELOGI("Start to build single op offline model, input size: %zu, output size: %zu", inputs.size(), outputs.size()); | GELOGI("Start to build single op offline model, input size: %zu, output size: %zu", inputs.size(), outputs.size()); | ||||
ModelBufferData model_buff; | ModelBufferData model_buff; | ||||
OpEngineType engine_type = ENGINE_SYS; | OpEngineType engine_type = ENGINE_SYS; | ||||
Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true); | |||||
Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true, compile_flag); | |||||
GELOGI("Finish build single offline model, status: %u", status); | GELOGI("Finish build single offline model, status: %u", status); | ||||
return status; | return status; | ||||
} | } | ||||
@@ -850,9 +909,11 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor | |||||
* @param [in] vector<GeTensor> &inputs: Operator input data description information. | * @param [in] vector<GeTensor> &inputs: Operator input data description information. | ||||
* @param [in] vector<GeTensor> &outputs: Operator output data description information. | * @param [in] vector<GeTensor> &outputs: Operator output data description information. | ||||
* @param [in] engine_type: specific engine. | * @param [in] engine_type: specific engine. | ||||
* @param [in] compile_flag: op build flag, compile flag by acl | |||||
* @param [out] ModelBufferData &Model_buff: Model_buff: model buffer of the op. | * @param [out] ModelBufferData &Model_buff: Model_buff: model buffer of the op. | ||||
* @return SUCCESS handle successfully / others handle failed | * @return SUCCESS handle successfully / others handle failed | ||||
*/ | */ | ||||
// old process will be deleted | |||||
Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | ||||
const vector<GeTensor> &outputs, OpEngineType engine_type, | const vector<GeTensor> &outputs, OpEngineType engine_type, | ||||
ModelBufferData &model_buff) { | ModelBufferData &model_buff) { | ||||
@@ -863,6 +924,17 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor | |||||
return status; | return status; | ||||
} | } | ||||
Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | |||||
const vector<GeTensor> &outputs, OpEngineType engine_type, int32_t compile_flag, | |||||
ModelBufferData &model_buff) { | |||||
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | |||||
GELOGI("Start to build single op online, input size: %zu, output size: %zu", inputs.size(), outputs.size()); | |||||
Status status = BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false, | |||||
compile_flag); | |||||
GELOGI("Finish build single online model, status: %u", status); | |||||
return status; | |||||
} | |||||
Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | ||||
const vector<GeTensor> &outputs, std::string graph_name, Graph &graph) { | const vector<GeTensor> &outputs, std::string graph_name, Graph &graph) { | ||||
ge::ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>(graph_name); | ge::ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>(graph_name); | ||||
@@ -60,6 +60,7 @@ | |||||
#include "graph/passes/iterator_op_pass.h" | #include "graph/passes/iterator_op_pass.h" | ||||
#include "graph/passes/link_gen_mask_nodes_pass.h" | #include "graph/passes/link_gen_mask_nodes_pass.h" | ||||
#include "graph/passes/mark_graph_unknown_status_pass.h" | #include "graph/passes/mark_graph_unknown_status_pass.h" | ||||
#include "graph/passes/mark_node_unknown_shape_pass.h" | |||||
#include "graph/passes/merge_pass.h" | #include "graph/passes/merge_pass.h" | ||||
#include "graph/passes/merge_input_memcpy_pass.h" | #include "graph/passes/merge_input_memcpy_pass.h" | ||||
#include "graph/passes/merge_to_stream_merge_pass.h" | #include "graph/passes/merge_to_stream_merge_pass.h" | ||||
@@ -941,6 +942,8 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||||
} | } | ||||
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kPrepareOptimize); | ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kPrepareOptimize); | ||||
// set fuzz compile flag after origin graph optimize | |||||
GE_CHK_STATUS_RET(SetFuzzCompileFlag(compute_graph), "Set fuzz compile flag failed."); | |||||
ret = PreRunOptimizeSubGraph(graph_node, compute_graph, session_id); | ret = PreRunOptimizeSubGraph(graph_node, compute_graph, session_id); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Run PreRunOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str()); | GELOGE(ret, "Run PreRunOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str()); | ||||
@@ -955,7 +958,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||||
options_.build_step == BUILD_STEP_AFTER_BUILDER || | options_.build_step == BUILD_STEP_AFTER_BUILDER || | ||||
options_.build_step == BUILD_STEP_AFTER_BUILDER_SUB)); | options_.build_step == BUILD_STEP_AFTER_BUILDER_SUB)); | ||||
if (run_after_optimize_subgraph) { | if (run_after_optimize_subgraph) { | ||||
Status ret = PreRunAfterOptimizeSubGraph(graph_node, compute_graph, ge_root_model, session_id); | |||||
ret = PreRunAfterOptimizeSubGraph(graph_node, compute_graph, ge_root_model, session_id); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Run PreRunAfterOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str()); | GELOGE(ret, "Run PreRunAfterOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str()); | ||||
return ret; | return ret; | ||||
@@ -973,6 +976,22 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphManager::SetFuzzCompileFlag(ComputeGraphPtr &compute_graph) { | |||||
if (!GetLocalOmgContext().fuzz_compile_flag) { | |||||
return SUCCESS; | |||||
} | |||||
for (const auto &node : compute_graph->GetAllNodes()) { | |||||
OpDescPtr op_desc = node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
GELOGD("Fuzz compile flag is %d.", GetLocalOmgContext().fuzz_compile_flag); | |||||
if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, GetLocalOmgContext().fuzz_compile_flag)) { | |||||
GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD]Failed to set fuzz build attr to %s.", op_desc->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status GraphManager::SubexpressionMigration(ComputeGraphPtr &compute_graph) { | Status GraphManager::SubexpressionMigration(ComputeGraphPtr &compute_graph) { | ||||
PassManager pass_manager; | PassManager pass_manager; | ||||
GE_CHK_STATUS_RET(pass_manager.AddPass("SubexpressionMigrationPass", new (std::nothrow) SubexpressionMigrationPass)); | GE_CHK_STATUS_RET(pass_manager.AddPass("SubexpressionMigrationPass", new (std::nothrow) SubexpressionMigrationPass)); | ||||
@@ -2446,6 +2465,8 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||||
GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::CompileNodesPass", | GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::CompileNodesPass", | ||||
new (std::nothrow) CompileNodesPass)) | new (std::nothrow) CompileNodesPass)) | ||||
GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass( | GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass( | ||||
"OptimizeStage2::AfterMergePasses::MarkNodeUnknownShapePass", new(std::nothrow) MarkNodeUnknownShapePass)) | |||||
GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass( | |||||
"OptimizeStage2::AfterMergePasses::MarkGraphUnknownStatusPass", new(std::nothrow) MarkGraphUnknownStatusPass)) | "OptimizeStage2::AfterMergePasses::MarkGraphUnknownStatusPass", new(std::nothrow) MarkGraphUnknownStatusPass)) | ||||
GE_CHK_STATUS_RET( | GE_CHK_STATUS_RET( | ||||
pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::InputOutputConnectionIdentifyPass", | pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::InputOutputConnectionIdentifyPass", | ||||
@@ -372,6 +372,7 @@ class GraphManager { | |||||
ComputeGraphPtr &compute_graph, | ComputeGraphPtr &compute_graph, | ||||
GeRootModelPtr &ge_root_model, | GeRootModelPtr &ge_root_model, | ||||
uint64_t session_id); | uint64_t session_id); | ||||
Status SetFuzzCompileFlag(ComputeGraphPtr &compute_graph); | |||||
Status CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_graph, | Status CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_graph, | ||||
Graph2SubGraphInfoList &sub_graph_map, | Graph2SubGraphInfoList &sub_graph_map, | ||||
@@ -0,0 +1,99 @@ | |||||
/** | |||||
* Copyright 2021 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include "graph/passes/mark_node_unknown_shape_pass.h" | |||||
#include "graph/utils/node_utils.h" | |||||
#include "graph/debug/ge_attr_define.h" | |||||
#include "graph/common/local_context.h" | |||||
namespace ge { | |||||
namespace { | |||||
const char *const kEngineNameAiCore = "AIcoreEngine"; | |||||
const char *const kNeedRefreshShape = "_need_generate"; | |||||
const char *const kOriginalNode = "_original_node"; | |||||
const int32_t kDynamicState = -2; | |||||
} | |||||
Status MarkNodeUnknownShapePass::Run(ComputeGraphPtr graph) { | |||||
GE_CHECK_NOTNULL(graph); | |||||
if (!GetLocalOmgContext().fuzz_compile_flag) { | |||||
return SUCCESS; | |||||
} | |||||
if (IsAllAicoreSupportDyn(graph)) { | |||||
if (UpdateNodeShapeToUnknown(graph) != SUCCESS) { | |||||
GELOGE(FAILED, "[Update][Node_Shape]Failed to update node shape to unknown."); | |||||
return FAILED; | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
bool MarkNodeUnknownShapePass::IsAllAicoreSupportDyn(ComputeGraphPtr &graph) { | |||||
bool is_all_aicore_support_dyn = false; | |||||
for (const auto &node : graph->GetAllNodes()) { | |||||
if (node->GetOpDesc() == nullptr) { | |||||
continue; | |||||
} | |||||
if (node->GetOpDesc()->GetOpKernelLibName() != kEngineNameAiCore) { | |||||
GELOGD("Kernel of %s is %s.", node->GetName().c_str(), node->GetOpDesc()->GetOpKernelLibName().c_str()); | |||||
continue; | |||||
} | |||||
NodePtr original_node = nullptr; | |||||
original_node = node->GetOpDesc()->TryGetExtAttr(kOriginalNode, original_node); | |||||
if ((original_node == nullptr && AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS)) || | |||||
(original_node != nullptr && AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS) && | |||||
!AttrUtils::HasAttr(original_node->GetOpDesc(), kNeedRefreshShape))) { | |||||
GELOGD("%s has set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str()); | |||||
is_all_aicore_support_dyn = true; | |||||
} else { | |||||
GELOGD("%s has not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str()); | |||||
is_all_aicore_support_dyn = false; | |||||
break; | |||||
} | |||||
} | |||||
return is_all_aicore_support_dyn; | |||||
} | |||||
Status MarkNodeUnknownShapePass::UpdateNodeShapeToUnknown(ComputeGraphPtr &graph) { | |||||
GELOGD("Need to update node shape to dynamic when get fuzz build result."); | |||||
for (const auto &node : graph->GetAllNodes()) { | |||||
if (NodeUtils::IsConst(*node) || node->GetType() == VARIABLE) { | |||||
continue; | |||||
} | |||||
auto op_desc = node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||||
auto src_node = NodeUtils::GetInDataNodeByIndex(*node, static_cast<int>(i)); | |||||
if (src_node != nullptr && (NodeUtils::IsConst(*src_node) || src_node->GetType() == VARIABLE)) { | |||||
continue; | |||||
} | |||||
GELOGD("Update input shape for %s.", node->GetName().c_str()); | |||||
auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i)); | |||||
if (input_desc != nullptr) { | |||||
input_desc->SetShape(GeShape({kDynamicState})); | |||||
} | |||||
} | |||||
for (auto &output_desc : op_desc->GetAllOutputsDescPtr()) { | |||||
if (output_desc != nullptr) { | |||||
GELOGD("Update output shape for %s.", node->GetName().c_str()); | |||||
output_desc->SetShape(GeShape({kDynamicState})); | |||||
} | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
} // namespace ge |
@@ -0,0 +1,32 @@ | |||||
/** | |||||
* Copyright 2021 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_ | |||||
#define GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_ | |||||
#include "graph/graph.h" | |||||
#include "inc/graph_pass.h" | |||||
namespace ge { | |||||
class MarkNodeUnknownShapePass : public GraphPass { | |||||
public: | |||||
Status Run(ComputeGraphPtr graph); | |||||
private: | |||||
bool IsAllAicoreSupportDyn(ComputeGraphPtr &graph); | |||||
Status UpdateNodeShapeToUnknown(ComputeGraphPtr &graph); | |||||
}; | |||||
} // namespace ge | |||||
#endif // GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_ |
@@ -55,9 +55,17 @@ Status InsertReshapeIfNeed(const NodePtr &node) { | |||||
GE_CHECK_NOTNULL(dst_node->GetOpDesc()); | GE_CHECK_NOTNULL(dst_node->GetOpDesc()); | ||||
auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx()); | auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx()); | ||||
GE_CHECK_NOTNULL(dst_tensor); | GE_CHECK_NOTNULL(dst_tensor); | ||||
bool is_need_insert_reshape = src_tensor->GetShape().GetDims() != UNKNOWN_RANK && | |||||
dst_tensor->GetShape().GetDims() != UNKNOWN_RANK && | |||||
src_tensor->GetShape().GetDims() != dst_tensor->GetShape().GetDims(); | |||||
bool is_dynamic = false; | |||||
const auto &src_tensor_dims = src_tensor->GetShape().GetDims(); | |||||
const auto &dst_tensor_dims = dst_tensor->GetShape().GetDims(); | |||||
if ((std::any_of(src_tensor_dims.begin(), src_tensor_dims.end(), [](int64_t val) { return val < 0 ; })) | |||||
|| (std::any_of(dst_tensor_dims.begin(), dst_tensor_dims.end(), [](int64_t val) { return val < 0; }))) { | |||||
GELOGD("No need to insert reshape node between %s nad %s.", node->GetName().c_str(), | |||||
dst_node->GetName().c_str()); | |||||
is_dynamic = true; | |||||
} | |||||
bool is_need_insert_reshape = src_tensor_dims != dst_tensor_dims && | |||||
!is_dynamic; | |||||
if (is_need_insert_reshape) { | if (is_need_insert_reshape) { | ||||
auto reshape = CreateReshape(src_tensor, dst_tensor, node->GetOwnerComputeGraph()); | auto reshape = CreateReshape(src_tensor, dst_tensor, node->GetOwnerComputeGraph()); | ||||
GE_CHECK_NOTNULL(reshape); | GE_CHECK_NOTNULL(reshape); | ||||
@@ -79,8 +79,10 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, | |||||
"Failed to execute partitioned call."); | "Failed to execute partitioned call."); | ||||
RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); | ||||
HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | |||||
RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | |||||
if (!model_->IsSingleOp()) { | |||||
HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | |||||
RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | |||||
} | |||||
args.outputs.clear(); | args.outputs.clear(); | ||||
HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); | HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); | ||||
@@ -168,7 +168,7 @@ Status NodeItem::InitInputsAndOutputs() { | |||||
Status NodeItem::ResolveDynamicState() { | Status NodeItem::ResolveDynamicState() { | ||||
(void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); | (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); | ||||
GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic); | |||||
GELOGD("Node name is %s, dynamic state is %d.", this->node_name.c_str(), is_dynamic); | |||||
if (!is_dynamic) { | if (!is_dynamic) { | ||||
GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_dynamic), | GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_dynamic), | ||||
"[%s] Failed to get shape status.", | "[%s] Failed to get shape status.", | ||||
@@ -22,6 +22,7 @@ | |||||
#include "hybrid/node_executor/aicore/aicore_task_builder.h" | #include "hybrid/node_executor/aicore/aicore_task_builder.h" | ||||
#include "graph/load/model_manager/tbe_handle_store.h" | #include "graph/load/model_manager/tbe_handle_store.h" | ||||
#include "graph/types.h" | #include "graph/types.h" | ||||
#include "single_op/task/build_task_utils.h" | |||||
using optiling::OpRunInfo; | using optiling::OpRunInfo; | ||||
@@ -31,6 +32,7 @@ namespace { | |||||
constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | ||||
constexpr char const *kAttrOpParamSize = "op_para_size"; | constexpr char const *kAttrOpParamSize = "op_para_size"; | ||||
constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | ||||
std::atomic<std::uint64_t> log_id(0); | |||||
} // namespace | } // namespace | ||||
TbeHandleHolder::TbeHandleHolder(void *bin_handle) | TbeHandleHolder::TbeHandleHolder(void *bin_handle) | ||||
@@ -48,6 +50,12 @@ bool TbeHandleRegistry::AddHandle(std::unique_ptr<TbeHandleHolder> &&holder) { | |||||
} | } | ||||
Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | ||||
log_name_ = op_desc.GetName() + "_tvmbin"; | |||||
log_id_ = log_id++; | |||||
auto op_desc_ptr = MakeShared<OpDesc>(op_desc); | |||||
GE_CHECK_NOTNULL(op_desc_ptr); | |||||
auto task_info = BuildTaskUtils::GetTaskInfo(op_desc_ptr); | |||||
GELOGI("[TASK_INFO] %lu/%s %s.", log_id_, log_name_.c_str(), task_info.c_str()); | |||||
GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); | GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); | ||||
GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); | GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); | ||||
@@ -191,18 +199,18 @@ Status AiCoreOpTask::InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDe | |||||
} | } | ||||
const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | ||||
uint32_t offset = *args_offset_buffer; | |||||
if (offset > args_size_) { | |||||
offset_ = *args_offset_buffer; | |||||
if (offset_ > args_size_) { | |||||
GELOGE(INTERNAL_ERROR, | GELOGE(INTERNAL_ERROR, | ||||
"[%s] Arg offset out of range. offset = %u, arg size = %u", | "[%s] Arg offset out of range. offset = %u, arg size = %u", | ||||
GetName().c_str(), | GetName().c_str(), | ||||
offset, | |||||
offset_, | |||||
args_size_); | args_size_); | ||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset); | |||||
max_arg_count_ = (args_size_ - offset) / sizeof(void *); | |||||
arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset_); | |||||
max_arg_count_ = (args_size_ - offset_) / sizeof(void *); | |||||
GELOGD("[%s] Done setting kernel args successfully. stub_func = %s, block_dim = %d, arg base = %p, arg size = %u", | GELOGD("[%s] Done setting kernel args successfully. stub_func = %s, block_dim = %d, arg base = %p, arg size = %u", | ||||
op_desc.GetName().c_str(), | op_desc.GetName().c_str(), | ||||
stub_name_.c_str(), | stub_name_.c_str(), | ||||
@@ -241,18 +249,18 @@ Status AiCoreOpTask::InitWithKernelDefWithHandle(const OpDesc &op_desc, const do | |||||
} | } | ||||
const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | ||||
uint32_t offset = *args_offset_buffer; | |||||
if (offset > args_size_) { | |||||
offset_ = *args_offset_buffer; | |||||
if (offset_ > args_size_) { | |||||
GELOGE(INTERNAL_ERROR, | GELOGE(INTERNAL_ERROR, | ||||
"[%s] Arg offset out of range. offset = %u, arg size = %u", | "[%s] Arg offset out of range. offset = %u, arg size = %u", | ||||
GetName().c_str(), | GetName().c_str(), | ||||
offset, | |||||
offset_, | |||||
args_size_); | args_size_); | ||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset); | |||||
max_arg_count_ = (args_size_ - offset) / sizeof(void *); | |||||
arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset_); | |||||
max_arg_count_ = (args_size_ - offset_) / sizeof(void *); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -360,12 +368,20 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { | |||||
++expected_arg_count; | ++expected_arg_count; | ||||
} | } | ||||
if (expected_arg_count > max_arg_count_) { | if (expected_arg_count > max_arg_count_) { | ||||
GELOGE(INTERNAL_ERROR, | |||||
"[%s] Invalid arg memory, max arg count = %u, but expect = %zu", | |||||
GetName().c_str(), | |||||
max_arg_count_, | |||||
expected_arg_count); | |||||
return INTERNAL_ERROR; | |||||
GELOGD("Need to reset size of args_ from %u to %zu.", max_arg_count_, expected_arg_count); | |||||
auto length = expected_arg_count * sizeof(uintptr_t) + offset_; | |||||
std::unique_ptr<uint8_t[]> new_args(new(std::nothrow) uint8_t[length]); | |||||
GE_CHECK_NOTNULL(new_args); | |||||
if (memcpy_s(new_args.get(), length, args_.get(), offset_) != EOK) { | |||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][new_args]failed, dst length is %zu, src length is %u.", | |||||
length, offset_); | |||||
REPORT_INNER_ERROR("E19999", "update kernel args failed of %s.", task_context.GetNodeName()); | |||||
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||||
} | |||||
args_ = std::move(new_args); | |||||
max_arg_count_ = static_cast<uint32_t>(expected_arg_count); | |||||
args_size_ = static_cast<uint32_t>(length); | |||||
arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset_); | |||||
} | } | ||||
int index = 0; | int index = 0; | ||||
@@ -421,6 +437,7 @@ Status AiCoreOpTask::LaunchKernel(rtStream_t stream) { | |||||
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream)); | GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream)); | ||||
GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); | GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); | ||||
} | } | ||||
GELOGI("[TASK_INFO] %lu/%s", log_id_, log_name_.c_str()); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -114,6 +114,9 @@ class AiCoreOpTask { | |||||
uint32_t tiling_key_ = 0; | uint32_t tiling_key_ = 0; | ||||
void *handle_ = nullptr; | void *handle_ = nullptr; | ||||
bool is_dynamic_ = false; | bool is_dynamic_ = false; | ||||
uint64_t log_id_ = 0; | |||||
std::string log_name_; | |||||
uint32_t offset_ = 0; | |||||
}; | }; | ||||
class AtomicAddrCleanOpTask : public AiCoreOpTask { | class AtomicAddrCleanOpTask : public AiCoreOpTask { | ||||
@@ -216,6 +216,10 @@ DEFINE_string(op_bank_path, "", "Optional; op bank path"); | |||||
DEFINE_string(display_model_info, "0", "Optional; display model info"); | DEFINE_string(display_model_info, "0", "Optional; display model info"); | ||||
DEFINE_string(performance_mode, "", "Optional; express high compile performance or high execute performance." | |||||
"normal: no need to compile, used saved .o files directly;" | |||||
"high: need to recompile, high execute performance mode."); | |||||
class GFlagUtils { | class GFlagUtils { | ||||
public: | public: | ||||
/** | /** | ||||
@@ -328,7 +332,8 @@ class GFlagUtils { | |||||
"Default value: $HOME/atc_data\n" | "Default value: $HOME/atc_data\n" | ||||
" --op_compiler_cache_mode Set the operator compilation cache mode." | " --op_compiler_cache_mode Set the operator compilation cache mode." | ||||
"Options are disable(default), enable and force(force to refresh the cache)\n" | "Options are disable(default), enable and force(force to refresh the cache)\n" | ||||
" --display_model_info enable for display model info; 0(default): close display, 1: open display"); | |||||
" --display_model_info enable for display model info; 0(default): close display, 1: open display.\n" | |||||
" --performance_mode Set high performance mode of compile or execute."); | |||||
gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); | gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); | ||||
// Using gflags to analyze input parameters | // Using gflags to analyze input parameters | ||||
@@ -1073,6 +1078,7 @@ static void SetEnvForSingleOp(std::map<string, string> &options) { | |||||
options.emplace(ge::OP_COMPILER_CACHE_MODE, FLAGS_op_compiler_cache_mode); | options.emplace(ge::OP_COMPILER_CACHE_MODE, FLAGS_op_compiler_cache_mode); | ||||
options.emplace(ge::MDL_BANK_PATH_FLAG, FLAGS_mdl_bank_path); | options.emplace(ge::MDL_BANK_PATH_FLAG, FLAGS_mdl_bank_path); | ||||
options.emplace(ge::OP_BANK_PATH_FLAG, FLAGS_op_bank_path); | options.emplace(ge::OP_BANK_PATH_FLAG, FLAGS_op_bank_path); | ||||
options.emplace(ge::PERFORMANCE_MODE, FLAGS_performance_mode); | |||||
} | } | ||||
domi::Status GenerateSingleOp(const std::string& json_file_path) { | domi::Status GenerateSingleOp(const std::string& json_file_path) { | ||||
@@ -1119,7 +1125,7 @@ domi::Status GenerateSingleOp(const std::string& json_file_path) { | |||||
output_path = FLAGS_output + "/"; | output_path = FLAGS_output + "/"; | ||||
} | } | ||||
output_path += param.file_name; | output_path += param.file_name; | ||||
ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path); | |||||
ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path, param.compile_flag); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
DOMI_LOGE("Compile op failed. ge ret = %u, op index = %d", ret, index); | DOMI_LOGE("Compile op failed. ge ret = %u, op index = %d", ret, index); | ||||
ret = domi::FAILED; | ret = domi::FAILED; | ||||
@@ -1224,6 +1230,8 @@ domi::Status GenerateOmModel() { | |||||
options.insert(std::pair<string, string>(string(ge::OP_BANK_PATH_FLAG), FLAGS_op_bank_path)); | options.insert(std::pair<string, string>(string(ge::OP_BANK_PATH_FLAG), FLAGS_op_bank_path)); | ||||
options.insert(std::pair<string, string>(string(ge::DISPLAY_MODEL_INFO), FLAGS_display_model_info)); | options.insert(std::pair<string, string>(string(ge::DISPLAY_MODEL_INFO), FLAGS_display_model_info)); | ||||
options.insert(std::pair<string, string>(string(ge::PERFORMANCE_MODE), FLAGS_performance_mode)); | |||||
// set enable scope fusion passes | // set enable scope fusion passes | ||||
SetEnableScopeFusionPasses(FLAGS_enable_scope_fusion_passes); | SetEnableScopeFusionPasses(FLAGS_enable_scope_fusion_passes); | ||||
// print atc option map | // print atc option map | ||||
@@ -53,6 +53,7 @@ constexpr char const *kKeyOriginFormat = "origin_format"; | |||||
constexpr char const *kFileSuffix = ".om"; | constexpr char const *kFileSuffix = ".om"; | ||||
constexpr char const *kKeyDynamicInput = "dynamic_input"; | constexpr char const *kKeyDynamicInput = "dynamic_input"; | ||||
constexpr char const *kKeyDynamicOutput = "dynamic_output"; | constexpr char const *kKeyDynamicOutput = "dynamic_output"; | ||||
constexpr char const *kKeyCompileFlag = "compile_flag"; | |||||
constexpr int kDumpJsonIndent = 2; | constexpr int kDumpJsonIndent = 2; | ||||
constexpr int kShapeRangePairSize = 2; | constexpr int kShapeRangePairSize = 2; | ||||
constexpr int kShapeRangeLow = 0; | constexpr int kShapeRangeLow = 0; | ||||
@@ -259,7 +260,10 @@ void from_json(const Json &j, SingleOpAttr &attr) { | |||||
} | } | ||||
void from_json(const Json &j, SingleOpDesc &desc) { | void from_json(const Json &j, SingleOpDesc &desc) { | ||||
desc.op = j.at(kKeyOp).get<string>(); | |||||
auto op = j.find(kKeyOp); | |||||
if (op != j.end()) { | |||||
desc.op = j.at(kKeyOp).get<string>(); | |||||
} | |||||
auto input_desc = j.find(kKeyInputDesc); | auto input_desc = j.find(kKeyInputDesc); | ||||
if (input_desc != j.end()) { | if (input_desc != j.end()) { | ||||
@@ -275,6 +279,11 @@ void from_json(const Json &j, SingleOpDesc &desc) { | |||||
if (attr_field != j.end()) { | if (attr_field != j.end()) { | ||||
desc.attrs = attr_field->get<vector<SingleOpAttr>>(); | desc.attrs = attr_field->get<vector<SingleOpAttr>>(); | ||||
} | } | ||||
auto compile_flag = j.find(kKeyCompileFlag); | |||||
if (compile_flag != j.end()) { | |||||
desc.compile_flag = compile_flag->get<int32_t>(); | |||||
} | |||||
} | } | ||||
Status SingleOpParser::ReadJsonFile(const std::string &file, Json &json_obj) { | Status SingleOpParser::ReadJsonFile(const std::string &file, Json &json_obj) { | ||||
@@ -572,10 +581,16 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<Si | |||||
return ret; | return ret; | ||||
} | } | ||||
int32_t compile_flag = 0; | |||||
for (const Json &single_op_json : single_op_list_json) { | for (const Json &single_op_json : single_op_list_json) { | ||||
SingleOpDesc single_op_desc; | SingleOpDesc single_op_desc; | ||||
GELOGI("Parsing op[%d], jsonStr = %s", index, single_op_json.dump(kDumpJsonIndent).c_str()); | GELOGI("Parsing op[%d], jsonStr = %s", index, single_op_json.dump(kDumpJsonIndent).c_str()); | ||||
single_op_desc = single_op_json; | single_op_desc = single_op_json; | ||||
GELOGD("Compile flag is %d.", single_op_desc.compile_flag); | |||||
if (single_op_desc.compile_flag == 1) { | |||||
compile_flag = single_op_desc.compile_flag; | |||||
continue; | |||||
} | |||||
if (UpdateDynamicTensorName(single_op_desc.input_desc) != SUCCESS) { | if (UpdateDynamicTensorName(single_op_desc.input_desc) != SUCCESS) { | ||||
GELOGE(FAILED, "Update dynamic tensor name failed!"); | GELOGE(FAILED, "Update dynamic tensor name failed!"); | ||||
return FAILED; | return FAILED; | ||||
@@ -591,6 +606,7 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<Si | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return ret; | return ret; | ||||
} | } | ||||
param.compile_flag = compile_flag; | |||||
op_list.emplace_back(param); | op_list.emplace_back(param); | ||||
GELOGI("Parse the index[%d] of op success", index); | GELOGI("Parse the index[%d] of op success", index); | ||||
@@ -55,6 +55,7 @@ struct SingleOpDesc { | |||||
std::vector<SingleOpTensorDesc> input_desc; | std::vector<SingleOpTensorDesc> input_desc; | ||||
std::vector<SingleOpTensorDesc> output_desc; | std::vector<SingleOpTensorDesc> output_desc; | ||||
std::vector<SingleOpAttr> attrs; | std::vector<SingleOpAttr> attrs; | ||||
int32_t compile_flag = 0; | |||||
}; | }; | ||||
struct SingleOpBuildParam { | struct SingleOpBuildParam { | ||||
@@ -62,6 +63,7 @@ struct SingleOpBuildParam { | |||||
std::vector<ge::GeTensor> inputs; | std::vector<ge::GeTensor> inputs; | ||||
std::vector<ge::GeTensor> outputs; | std::vector<ge::GeTensor> outputs; | ||||
std::string file_name; | std::string file_name; | ||||
int32_t compile_flag = 0; | |||||
}; | }; | ||||
void from_json(const nlohmann::json &json, SingleOpTensorDesc &desc); | void from_json(const nlohmann::json &json, SingleOpTensorDesc &desc); | ||||
@@ -34,6 +34,9 @@ const size_t kDataMemAlignSize = 32; | |||||
const size_t kDataMemAlignUnit = 2; | const size_t kDataMemAlignUnit = 2; | ||||
const string kShapeTypeDynamic = "dynamic"; | const string kShapeTypeDynamic = "dynamic"; | ||||
const string kShapeTypeStatic = "static"; | const string kShapeTypeStatic = "static"; | ||||
const int64_t kHostMemType = 1; | |||||
const uint32_t kFuzzDeviceBufferSize = 1 * 1024 * 1024; | |||||
const uint32_t kAlignBytes = 512; | |||||
size_t GetAlignedSize(size_t size) { | size_t GetAlignedSize(size_t size) { | ||||
size_t aligned_size = (size + kDataMemAlignUnit * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; | size_t aligned_size = (size + kDataMemAlignUnit * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; | ||||
@@ -65,6 +68,68 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { | |||||
profiling_manager.ReportProfilingData(model_id, task_desc_info); | profiling_manager.ReportProfilingData(model_id, task_desc_info); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status CalInputsHostMemSize(const std::vector<DataBuffer> &inputs, | |||||
std::vector<std::pair<size_t, uint64_t>> &inputs_size) { | |||||
int64_t total_size = 0; | |||||
size_t index = 0; | |||||
for (auto &input_buffer : inputs) { | |||||
int64_t input_size = 0; | |||||
if (input_buffer.placement == kHostMemType) { | |||||
GE_CHECK_LE(input_buffer.length, INT64_MAX); | |||||
input_size = input_buffer.length; | |||||
// input_size pad to 512 | |||||
GE_CHK_STATUS_RET(CheckInt64AddOverflow(input_size, (kAlignBytes - 1)), "Padding size is beyond the INT64_MAX."); | |||||
input_size = ((input_size + kAlignBytes - 1) / kAlignBytes) * kAlignBytes; | |||||
inputs_size.emplace_back(index, input_size); | |||||
GE_CHK_STATUS_RET(CheckInt64AddOverflow(total_size, input_size), "Total size is beyond the INT64_MAX."); | |||||
total_size += input_size; | |||||
GELOGD("The %zu input mem type is host, the tensor size is %ld.", index, input_size); | |||||
} | |||||
index++; | |||||
} | |||||
if (total_size > kFuzzDeviceBufferSize) { | |||||
GELOGE(FAILED, "[Check][Size]Total size is %ld, larger than 1M.", total_size); | |||||
return FAILED; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status UpdateInputsBufferAddr(StreamResource *stream_resource, rtStream_t stream, | |||||
const std::vector<std::pair<size_t, uint64_t>> &inputs_size, | |||||
std::vector<DataBuffer> &update_buffers) { | |||||
GE_CHECK_NOTNULL(stream_resource); | |||||
auto dst_addr = reinterpret_cast<uint8_t *>(stream_resource->GetDeviceBufferAddr()); | |||||
// copy host mem from input_buffer to device mem of dst_addr | |||||
for (const auto &input_size : inputs_size) { | |||||
auto index = input_size.first; | |||||
auto size = input_size.second; | |||||
GELOGD("Do h2d for %zu input, dst size is %zu, src length is %lu.", index, size, update_buffers[index].length); | |||||
GE_CHK_RT_RET(rtMemcpyAsync(dst_addr, size, update_buffers[index].data, update_buffers[index].length, | |||||
RT_MEMCPY_HOST_TO_DEVICE_EX, stream)); | |||||
update_buffers[index].data = dst_addr; | |||||
dst_addr = dst_addr + size; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status InitHybridModelArgs(const std::vector<DataBuffer> &input_buffers, | |||||
const std::vector<DataBuffer> &output_buffers, | |||||
const std::vector<GeTensorDesc> &inputs_desc, | |||||
hybrid::HybridModelExecutor::ExecuteArgs &args) { | |||||
for (auto &input : input_buffers) { | |||||
args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length)); | |||||
} | |||||
for (auto &output : output_buffers) { | |||||
args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length)); | |||||
} | |||||
for (auto &tensor_desc : inputs_desc) { | |||||
auto desc = MakeShared<GeTensorDesc>(tensor_desc); | |||||
GE_CHECK_NOTNULL(desc); | |||||
args.input_desc.emplace_back(desc); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
} // namespace | } // namespace | ||||
SingleOp::SingleOp(StreamResource *stream_resource, std::mutex *stream_mutex, rtStream_t stream) | SingleOp::SingleOp(StreamResource *stream_resource, std::mutex *stream_mutex, rtStream_t stream) | ||||
@@ -155,13 +220,28 @@ Status SingleOp::UpdateArgs(const std::vector<DataBuffer> &inputs, const std::ve | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(const std::vector<DataBuffer> &inputs, | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(const std::vector<DataBuffer> &inputs, | ||||
const std::vector<DataBuffer> &outputs) { | const std::vector<DataBuffer> &outputs) { | ||||
GELOGD("Start SingleOp::ExecuteAsync."); | |||||
Status ret = ValidateArgs(inputs, outputs); | Status ret = ValidateArgs(inputs, outputs); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return ret; | return ret; | ||||
} | } | ||||
GE_CHECK_NOTNULL(stream_resource_); | GE_CHECK_NOTNULL(stream_resource_); | ||||
vector<pair<size_t, uint64_t>> inputs_size; | |||||
GE_CHK_STATUS_RET_NOLOG(CalInputsHostMemSize(inputs, inputs_size)); | |||||
std::lock_guard<std::mutex> lk(*stream_mutex_); | std::lock_guard<std::mutex> lk(*stream_mutex_); | ||||
vector<DataBuffer> update_buffers = inputs; | |||||
if (!inputs_size.empty()) { | |||||
GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource_, stream_, inputs_size, update_buffers)); | |||||
} | |||||
if (hybrid_model_executor_ != nullptr) { | |||||
GELOGD("Execute multi-task single op by hybrid model executor"); | |||||
hybrid::HybridModelExecutor::ExecuteArgs args; | |||||
GE_CHK_STATUS_RET_NOLOG(InitHybridModelArgs(update_buffers, outputs, inputs_desc_, args)); | |||||
return hybrid_model_executor_->Execute(args); | |||||
} | |||||
auto current_mem_base = stream_resource_->GetMemoryBase(); | auto current_mem_base = stream_resource_->GetMemoryBase(); | ||||
if (running_param_->mem_base != current_mem_base) { | if (running_param_->mem_base != current_mem_base) { | ||||
running_param_->mem_base = const_cast<uint8_t *>(current_mem_base); | running_param_->mem_base = const_cast<uint8_t *>(current_mem_base); | ||||
@@ -173,7 +253,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c | |||||
task->GetOpdesc()->GetName().c_str()); | task->GetOpdesc()->GetName().c_str()); | ||||
} | } | ||||
} | } | ||||
ret = UpdateArgs(inputs, outputs); | |||||
ret = UpdateArgs(update_buffers, outputs); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return ret; | return ret; | ||||
} | } | ||||
@@ -237,33 +317,64 @@ Status DynamicSingleOp::ValidateParams(const vector<GeTensorDesc> &input_desc, | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status DynamicSingleOp::SetHostTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size, | |||||
const vector<GeTensorDesc> &input_desc, | |||||
const std::vector<DataBuffer> &input_buffers) { | |||||
auto op_desc = op_task_->GetOpdesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
GELOGD("Start update inputs tensor value of %s.", op_desc->GetName().c_str()); | |||||
for (const auto &input_size : inputs_size) { | |||||
size_t index = input_size.first; | |||||
auto ge_tensor_desc = input_desc.at(index); | |||||
// reconstruct GeTensor by DataBuffer | |||||
GeTensorPtr ge_tensor = MakeShared<GeTensor>(ge_tensor_desc); | |||||
GE_CHECK_NOTNULL(ge_tensor); | |||||
GELOGD("The %zu tensor input type is host, desc data type is %d, input buffer addr is %p, size is %ld.", | |||||
index, ge_tensor_desc.GetDataType(), input_buffers[index].data, input_buffers[index].length); | |||||
if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(input_buffers[index].data), | |||||
static_cast<size_t>(input_buffers[index].length)) != SUCCESS) { | |||||
GELOGE(INTERNAL_ERROR, "[Set][Data]Failed to set data of ge tensor."); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
auto tensor_desc = op_desc->MutableInputDesc(index); | |||||
GE_CHECK_NOTNULL(tensor_desc); | |||||
if (!AttrUtils::SetTensor(tensor_desc, ATTR_NAME_VALUE, ge_tensor)) { | |||||
GELOGE(FAILED, "[Set][ATTR_NAME_VALUE]Failed to set ATTR_NAME_VALUE to %s.", op_desc->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | ||||
const vector<DataBuffer> &input_buffers, | const vector<DataBuffer> &input_buffers, | ||||
vector<GeTensorDesc> &output_desc, | vector<GeTensorDesc> &output_desc, | ||||
vector<DataBuffer> &output_buffers) { | vector<DataBuffer> &output_buffers) { | ||||
GELOGD("Start DynamicSingleOp::ExecuteAsync."); | |||||
GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); | GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); | ||||
vector<pair<size_t, uint64_t>> inputs_size; | |||||
GE_CHK_STATUS_RET_NOLOG(CalInputsHostMemSize(input_buffers, inputs_size)); | |||||
vector<DataBuffer> update_buffers = input_buffers; | |||||
std::lock_guard<std::mutex> lk(*stream_mutex_); | |||||
if (!inputs_size.empty()) { | |||||
StreamResource *stream_resource = SingleOpManager::GetInstance().GetResource(resource_id_, stream_); | |||||
GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource, stream_, inputs_size, update_buffers)); | |||||
} | |||||
if (hybrid_model_executor_ != nullptr) { | if (hybrid_model_executor_ != nullptr) { | ||||
GELOGD("Execute multi-task dynamic single op by hybrid model executor"); | GELOGD("Execute multi-task dynamic single op by hybrid model executor"); | ||||
hybrid::HybridModelExecutor::ExecuteArgs args; | hybrid::HybridModelExecutor::ExecuteArgs args; | ||||
for (auto &input : input_buffers) { | |||||
args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length)); | |||||
} | |||||
for (auto &output : output_buffers) { | |||||
args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length)); | |||||
} | |||||
for (auto &tensor_desc : input_desc) { | |||||
auto desc = MakeShared<GeTensorDesc>(tensor_desc); | |||||
GE_CHECK_NOTNULL(desc); | |||||
args.input_desc.emplace_back(desc); | |||||
} | |||||
GE_CHK_STATUS_RET_NOLOG(InitHybridModelArgs(update_buffers, output_buffers, input_desc, args)); | |||||
return hybrid_model_executor_->Execute(args); | return hybrid_model_executor_->Execute(args); | ||||
} | } | ||||
std::lock_guard<std::mutex> lk(*stream_mutex_); | |||||
GE_CHECK_NOTNULL(op_task_); | GE_CHECK_NOTNULL(op_task_); | ||||
GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | |||||
if (!inputs_size.empty()) { | |||||
GE_CHK_STATUS_RET_NOLOG(SetHostTensorValue(inputs_size, input_desc, input_buffers)); | |||||
GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, update_buffers, output_desc, output_buffers, stream_)); | |||||
} else { | |||||
GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | |||||
} | |||||
GE_CHK_STATUS_RET_NOLOG(op_task_->OpenDump(stream_)); | GE_CHK_STATUS_RET_NOLOG(op_task_->OpenDump(stream_)); | ||||
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -59,6 +59,9 @@ class SingleOp { | |||||
std::vector<OpTask *> tasks_; | std::vector<OpTask *> tasks_; | ||||
std::vector<std::vector<uintptr_t *>> arg_table_; | std::vector<std::vector<uintptr_t *>> arg_table_; | ||||
std::unique_ptr<SingleOpModelParam> running_param_; | std::unique_ptr<SingleOpModelParam> running_param_; | ||||
std::unique_ptr<hybrid::HybridModel> hybrid_model_; | |||||
std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_; | |||||
std::vector<GeTensorDesc> inputs_desc_; | |||||
}; | }; | ||||
class DynamicSingleOp { | class DynamicSingleOp { | ||||
@@ -76,7 +79,8 @@ class DynamicSingleOp { | |||||
const std::vector<DataBuffer> &inputs, | const std::vector<DataBuffer> &inputs, | ||||
std::vector<GeTensorDesc> &output_desc, | std::vector<GeTensorDesc> &output_desc, | ||||
std::vector<DataBuffer> &outputs) const; | std::vector<DataBuffer> &outputs) const; | ||||
Status SetHostTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size, | |||||
const vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers); | |||||
std::unique_ptr<OpTask> op_task_; | std::unique_ptr<OpTask> op_task_; | ||||
std::unique_ptr<hybrid::HybridModel> hybrid_model_; | std::unique_ptr<hybrid::HybridModel> hybrid_model_; | ||||
std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_; | std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_; | ||||
@@ -85,6 +89,7 @@ class DynamicSingleOp { | |||||
rtStream_t stream_ = nullptr; | rtStream_t stream_ = nullptr; | ||||
size_t num_inputs_ = 0; | size_t num_inputs_ = 0; | ||||
size_t num_outputs_ = 0; | size_t num_outputs_ = 0; | ||||
ComputeGraphPtr compute_graph_; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_SINGLE_OP_SINGLE_OP_H_ | #endif // GE_SINGLE_OP_SINGLE_OP_H_ |
@@ -79,8 +79,13 @@ StreamResource *SingleOpManager::GetResource(uintptr_t resource_id, rtStream_t s | |||||
auto it = stream_resources_.find(resource_id); | auto it = stream_resources_.find(resource_id); | ||||
StreamResource *res = nullptr; | StreamResource *res = nullptr; | ||||
if (it == stream_resources_.end()) { | if (it == stream_resources_.end()) { | ||||
res = new (std::nothrow) StreamResource(resource_id); | |||||
res = new(std::nothrow) StreamResource(resource_id); | |||||
if (res != nullptr) { | if (res != nullptr) { | ||||
if (res->Init() != SUCCESS) { | |||||
GELOGE(FAILED, "[Malloc][Memory]Failed to malloc device buffer."); | |||||
delete res; | |||||
return nullptr; | |||||
} | |||||
res->SetStream(stream); | res->SetStream(stream); | ||||
stream_resources_.emplace(resource_id, res); | stream_resources_.emplace(resource_id, res); | ||||
} | } | ||||
@@ -43,6 +43,8 @@ using std::vector; | |||||
namespace ge { | namespace ge { | ||||
namespace { | namespace { | ||||
const size_t kDataOutputNum = 1; | const size_t kDataOutputNum = 1; | ||||
const uint32_t kOutputIndexOfData = 0; | |||||
constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | |||||
Status IfInferDepend(GeModelPtr &ge_model, bool &flag) { | Status IfInferDepend(GeModelPtr &ge_model, bool &flag) { | ||||
auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); | auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); | ||||
@@ -51,7 +53,9 @@ Status IfInferDepend(GeModelPtr &ge_model, bool &flag) { | |||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
const auto &depends = op_desc->GetOpInferDepends(); | const auto &depends = op_desc->GetOpInferDepends(); | ||||
if (!depends.empty()) { | |||||
bool support_dynamic_shape = false; | |||||
(void)AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, support_dynamic_shape); | |||||
if (!depends.empty() && support_dynamic_shape) { | |||||
flag = true; | flag = true; | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -437,6 +441,31 @@ Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTa | |||||
*task = aicpucc_task.release(); | *task = aicpucc_task.release(); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status SingleOpModel::InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, | |||||
SingleOp &single_op) { | |||||
for (const auto &op_desc : data_ops_) { | |||||
auto output_tensor_desc = op_desc->GetOutputDesc(kOutputIndexOfData); | |||||
GeTensorDesc tensor_desc(output_tensor_desc); | |||||
single_op.inputs_desc_.emplace_back(tensor_desc); | |||||
GELOGD("Init inputs desc from %s.", op_desc->GetName().c_str()); | |||||
} | |||||
GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized()); | |||||
auto root_model = model_helper_.GetGeRootModel(); | |||||
GE_CHECK_NOTNULL(root_model); | |||||
root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph())); | |||||
root_model->SetSubgraphInstanceNameToModel(root_model->GetRootGraph()->GetName(), ge_model); | |||||
single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model)); | |||||
GE_CHECK_NOTNULL(single_op.hybrid_model_); | |||||
GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "[Init][HybridModel]Failed."); | |||||
int32_t device_id = 0; | |||||
GE_CHK_RT_RET(rtGetDevice(&device_id)); | |||||
single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(), | |||||
device_id, | |||||
resource.GetStream())); | |||||
GE_CHECK_NOTNULL(single_op.hybrid_model_executor_); | |||||
GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "[Init][HybridModelExecutor]Failed."); | |||||
return SUCCESS; | |||||
} | |||||
Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { | Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { | ||||
GE_CHK_STATUS_RET_NOLOG(ParseInputsAndOutputs()); | GE_CHK_STATUS_RET_NOLOG(ParseInputsAndOutputs()); | ||||
@@ -444,10 +473,20 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { | |||||
single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params_)); | single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params_)); | ||||
GE_CHECK_NOTNULL(single_op.running_param_); | GE_CHECK_NOTNULL(single_op.running_param_); | ||||
GE_CHK_STATUS_RET_NOLOG(SetInputsAndOutputs(single_op)); | GE_CHK_STATUS_RET_NOLOG(SetInputsAndOutputs(single_op)); | ||||
auto ge_model = model_helper_.GetGeModel(); | |||||
GE_CHECK_NOTNULL(ge_model); | |||||
bool infer_depend_flag = false; | |||||
GE_CHK_STATUS_RET(IfInferDepend(ge_model, infer_depend_flag), "[Check][InferDepend] failed."); | |||||
if (infer_depend_flag) { | |||||
// construct single_op, do single op with HybridModelExecutor | |||||
GELOGD("Init hybrid model params of single op, and will do execute with hybrid model executor."); | |||||
return InitHybridModelExecutor(resource, ge_model, single_op); | |||||
} | |||||
return BuildTaskList(&resource, single_op); | return BuildTaskList(&resource, single_op); | ||||
} | } | ||||
Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) { | |||||
Status SingleOpModel::BuildModelTaskKernel(StreamResource *stream_resource, const TaskDef &task_def, | |||||
DynamicSingleOp &single_op) { | |||||
auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | ||||
const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : | const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : | ||||
task_def.kernel_with_handle().context(); | task_def.kernel_with_handle().context(); | ||||
@@ -458,6 +497,10 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl | |||||
TbeOpTask *tbe_task = nullptr; | TbeOpTask *tbe_task = nullptr; | ||||
GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); | GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); | ||||
tbe_task->SetModelArgs(model_name_, model_id_); | tbe_task->SetModelArgs(model_name_, model_id_); | ||||
if (tbe_task->tiling_buffer_ != nullptr) { | |||||
GELOGD("tiling buffer is not nullptr."); | |||||
tbe_task->stream_resource_ = stream_resource; | |||||
} | |||||
single_op.op_task_.reset(tbe_task); | single_op.op_task_.reset(tbe_task); | ||||
} else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | ||||
GELOGD("Building AICPU_CC task"); | GELOGD("Building AICPU_CC task"); | ||||
@@ -475,10 +518,13 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||||
Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &single_op) { | |||||
auto ge_model = model_helper_.GetGeModel(); | auto ge_model = model_helper_.GetGeModel(); | ||||
GE_CHECK_NOTNULL(ge_model); | GE_CHECK_NOTNULL(ge_model); | ||||
auto compute_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); | |||||
GE_CHECK_NOTNULL(compute_graph); | |||||
single_op.compute_graph_ = compute_graph; | |||||
auto tasks = ge_model->GetModelTaskDefPtr()->task(); | auto tasks = ge_model->GetModelTaskDefPtr()->task(); | ||||
for (int i = 0; i < tasks.size(); ++i) { | for (int i = 0; i < tasks.size(); ++i) { | ||||
const TaskDef &task_def = tasks[i]; | const TaskDef &task_def = tasks[i]; | ||||
@@ -490,7 +536,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||||
GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks."); | GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks."); | ||||
return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; | return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; | ||||
} | } | ||||
GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(task_def, single_op)); | |||||
GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(stream_resource, task_def, single_op)); | |||||
} else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | ||||
if (single_op.op_task_ != nullptr) { | if (single_op.op_task_ != nullptr) { | ||||
GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks."); | GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks."); | ||||
@@ -527,6 +573,7 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & | |||||
single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); | single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); | ||||
GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); | GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); | ||||
model_params_.memory_size = UINT_MAX; | model_params_.memory_size = UINT_MAX; | ||||
model_params_.graph_is_dynamic = true; | |||||
auto ge_model = model_helper_.GetGeModel(); | auto ge_model = model_helper_.GetGeModel(); | ||||
GE_CHECK_NOTNULL(ge_model); | GE_CHECK_NOTNULL(ge_model); | ||||
@@ -551,6 +598,6 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & | |||||
GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "Failed to init hybrid model"); | GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "Failed to init hybrid model"); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
return BuildTaskListForDynamicOp(single_op); | |||||
return BuildTaskListForDynamicOp(&resource, single_op); | |||||
} | } | ||||
} // namespace ge | } // namespace ge |
@@ -40,6 +40,7 @@ struct SingleOpModelParam { | |||||
std::map<uintptr_t, int> addr_mapping_; | std::map<uintptr_t, int> addr_mapping_; | ||||
int64_t core_type = 0; | int64_t core_type = 0; | ||||
bool graph_is_dynamic = false; | |||||
}; | }; | ||||
class SingleOpModel { | class SingleOpModel { | ||||
@@ -65,15 +66,17 @@ class SingleOpModel { | |||||
void ParseOutputNode(const OpDescPtr &op_desc); | void ParseOutputNode(const OpDescPtr &op_desc); | ||||
Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); | Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); | ||||
Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); | |||||
Status BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &dynamic_single_op); | |||||
Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); | Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); | ||||
Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, | Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, | ||||
bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id); | bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id); | ||||
Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); | Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); | ||||
Status BuildModelTaskKernel(const domi::TaskDef &task_def, DynamicSingleOp &single_op); | |||||
Status BuildModelTaskKernel(StreamResource *stream_resource, const domi::TaskDef &task_def, | |||||
DynamicSingleOp &single_op); | |||||
static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam ¶m); | static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam ¶m); | ||||
void ParseArgTable(OpTask *task, SingleOp &op); | void ParseArgTable(OpTask *task, SingleOp &op); | ||||
Status InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, SingleOp &single_op); | |||||
std::string model_name_; | std::string model_name_; | ||||
uint32_t model_id_ = 0; | uint32_t model_id_ = 0; | ||||
@@ -22,6 +22,11 @@ | |||||
#include "single_op/single_op_model.h" | #include "single_op/single_op_model.h" | ||||
namespace ge { | namespace ge { | ||||
namespace { | |||||
// limit available device mem size 1M | |||||
const uint32_t kFuzzDeviceBufferSize = 1 * 1024 * 1024; | |||||
} | |||||
StreamResource::StreamResource(uintptr_t resource_id) : resource_id_(resource_id) { | StreamResource::StreamResource(uintptr_t resource_id) : resource_id_(resource_id) { | ||||
} | } | ||||
@@ -39,6 +44,17 @@ StreamResource::~StreamResource() { | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtFree failed")); | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtFree failed")); | ||||
} | } | ||||
} | } | ||||
if (device_buffer_ != nullptr) { | |||||
auto rt_ret = rtFree(device_buffer_); | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed.")); | |||||
} | |||||
} | |||||
Status StreamResource::Init() { | |||||
auto rt_ret = rtMalloc(&device_buffer_, kFuzzDeviceBufferSize, RT_MEMORY_HBM); | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Malloc][Rt] failed.")); | |||||
return SUCCESS; | |||||
} | } | ||||
SingleOp *StreamResource::GetOperator(const uint64_t key) { | SingleOp *StreamResource::GetOperator(const uint64_t key) { | ||||
@@ -40,6 +40,7 @@ class StreamResource { | |||||
rtStream_t GetStream() const; | rtStream_t GetStream() const; | ||||
void SetStream(rtStream_t stream); | void SetStream(rtStream_t stream); | ||||
Status Init(); | |||||
SingleOp *GetOperator(const uint64_t key); | SingleOp *GetOperator(const uint64_t key); | ||||
DynamicSingleOp *GetDynamicOperator(const uint64_t key); | DynamicSingleOp *GetDynamicOperator(const uint64_t key); | ||||
@@ -49,6 +50,9 @@ class StreamResource { | |||||
uint8_t *MallocMemory(const std::string &purpose, size_t size, bool holding_lock = true); | uint8_t *MallocMemory(const std::string &purpose, size_t size, bool holding_lock = true); | ||||
uint8_t *MallocWeight(const std::string &purpose, size_t size); | uint8_t *MallocWeight(const std::string &purpose, size_t size); | ||||
const uint8_t *GetMemoryBase() const; | const uint8_t *GetMemoryBase() const; | ||||
void *GetDeviceBufferAddr() const { | |||||
return device_buffer_; | |||||
} | |||||
private: | private: | ||||
uint8_t *DoMallocMemory(const std::string &purpose, | uint8_t *DoMallocMemory(const std::string &purpose, | ||||
@@ -65,6 +69,7 @@ class StreamResource { | |||||
rtStream_t stream_ = nullptr; | rtStream_t stream_ = nullptr; | ||||
std::mutex mu_; | std::mutex mu_; | ||||
std::mutex stream_mu_; | std::mutex stream_mu_; | ||||
void *device_buffer_ = nullptr; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
@@ -134,7 +134,7 @@ Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) { | |||||
Status OpTask::UpdateRunInfo() { | |||||
return UNSUPPORTED; | return UNSUPPORTED; | ||||
} | } | ||||
@@ -196,14 +196,14 @@ void TbeOpTask::SetHandle(void *handle) { | |||||
Status TbeOpTask::LaunchKernel(rtStream_t stream) { | Status TbeOpTask::LaunchKernel(rtStream_t stream) { | ||||
GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); | GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); | ||||
auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_); | |||||
auto ret = rtKernelLaunch(stub_func_, block_dim_, args_.get(), static_cast<uint32_t>(arg_size_), sm_desc, stream); | |||||
auto ret = DoLaunchKernel(stream); | |||||
int retry_times = 0; | int retry_times = 0; | ||||
while (ret != RT_ERROR_NONE && retry_times < kLaunchRetryTimes) { | while (ret != RT_ERROR_NONE && retry_times < kLaunchRetryTimes) { | ||||
retry_times++; | retry_times++; | ||||
GELOGW("Retry after %d ms, retry_times: %d", kSleepTime, retry_times); | GELOGW("Retry after %d ms, retry_times: %d", kSleepTime, retry_times); | ||||
std::this_thread::sleep_for(std::chrono::milliseconds(kSleepTime)); | std::this_thread::sleep_for(std::chrono::milliseconds(kSleepTime)); | ||||
ret = rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, sm_desc, stream); | |||||
ret = DoLaunchKernel(stream); | |||||
} | } | ||||
if (ret != RT_ERROR_NONE) { | if (ret != RT_ERROR_NONE) { | ||||
@@ -215,8 +215,7 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) { | |||||
GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc)); | |||||
Status TbeOpTask::UpdateRunInfo() { | |||||
// invoke OpParaCalculate | // invoke OpParaCalculate | ||||
GELOGD("Start to invoke OpParaCalculate."); | GELOGD("Start to invoke OpParaCalculate."); | ||||
optiling::OpRunInfo run_info; | optiling::OpRunInfo run_info; | ||||
@@ -229,10 +228,9 @@ Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const ve | |||||
block_dim_ = run_info.block_dim; | block_dim_ = run_info.block_dim; | ||||
tiling_data_ = run_info.tiling_data.str(); | tiling_data_ = run_info.tiling_data.str(); | ||||
tiling_key_ = run_info.tiling_key; | tiling_key_ = run_info.tiling_key; | ||||
run_info_workspaces_ = run_info.workspaces; | |||||
GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_, | GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_, | ||||
tiling_data_.size(), tiling_key_); | tiling_data_.size(), tiling_key_); | ||||
GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "Failed to allocate workspaces"); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -282,14 +280,33 @@ Status TbeOpTask::UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, cons | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
void TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size) { | |||||
Status TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size) { | |||||
if (tiling_buffer != nullptr) { | |||||
uintptr_t *arg_base = nullptr; | |||||
size_t arg_num = 0; | |||||
GetIoAddr(arg_base, arg_num); | |||||
GE_CHECK_NOTNULL(node); | |||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
uint32_t inputs_num = node->GetOpDesc()->GetInputsSize(); | |||||
uint32_t outputs_num = node->GetOpDesc()->GetOutputsSize(); | |||||
uint32_t workspace_nums = node->GetOpDesc()->GetWorkspace().size(); | |||||
uint32_t tiling_index = inputs_num + outputs_num + workspace_nums; | |||||
if (arg_num == 0 || arg_num < tiling_index) { | |||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Check][Size]Tiling index %u, arg number %zu is invalid.", | |||||
tiling_index, arg_num); | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
} | |||||
arg_base[tiling_index] = reinterpret_cast<uintptr_t>(tiling_buffer); | |||||
} | |||||
node_ = node; | node_ = node; | ||||
tiling_buffer_ = tiling_buffer; | tiling_buffer_ = tiling_buffer; | ||||
max_tiling_size_ = max_tiling_size; | max_tiling_size_ = max_tiling_size; | ||||
return SUCCESS; | |||||
} | } | ||||
Status TbeOpTask::AllocateWorkspaces(const vector<int64_t> &workspace_sizes) { | Status TbeOpTask::AllocateWorkspaces(const vector<int64_t> &workspace_sizes) { | ||||
static const std::string kPurpose("malloc workspace memory for dynamic op."); | static const std::string kPurpose("malloc workspace memory for dynamic op."); | ||||
workspaces_.clear(); | |||||
if (workspace_sizes.empty()) { | if (workspace_sizes.empty()) { | ||||
GELOGD("No need to allocate workspace."); | GELOGD("No need to allocate workspace."); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -326,8 +343,10 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||||
vector<GeTensorDesc> &output_desc, | vector<GeTensorDesc> &output_desc, | ||||
vector<DataBuffer> &output_buffers, | vector<DataBuffer> &output_buffers, | ||||
rtStream_t stream) { | rtStream_t stream) { | ||||
GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo(input_desc, output_desc)); | |||||
GELOGD("[%s] Start to launch kernel", node_->GetName().c_str()); | GELOGD("[%s] Start to launch kernel", node_->GetName().c_str()); | ||||
GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc)); | |||||
GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo()); | |||||
GE_CHK_STATUS_RET(AllocateWorkspaces(run_info_workspaces_), "[Allocate][Workspaces] failed."); | |||||
std::vector<void *> args; | std::vector<void *> args; | ||||
for (auto &buffer : input_buffers) { | for (auto &buffer : input_buffers) { | ||||
args.emplace_back(buffer.data); | args.emplace_back(buffer.data); | ||||
@@ -347,6 +366,15 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||||
args.emplace_back(tiling_buffer_); | args.emplace_back(tiling_buffer_); | ||||
} | } | ||||
GELOGD("Dst size is %zu, src size is %zu.", arg_size_, args.size() * sizeof(void *)); | |||||
// node with workspace: build can not get size of workspace, need to update arg_size_ when execute | |||||
if (arg_size_ < (args.size() * sizeof(void *))) { | |||||
size_t temp_size = args.size() * sizeof(void *); | |||||
GELOGD("Need to reset size of args_ from %zu to %zu.", arg_size_, temp_size); | |||||
args_.reset(new(std::nothrow) uint8_t[temp_size]()); | |||||
GE_CHECK_NOTNULL(args_); | |||||
arg_size_ = temp_size; | |||||
} | |||||
if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) { | if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) { | ||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[%s] Failed to update kernel args.", | GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[%s] Failed to update kernel args.", | ||||
node_->GetName().c_str()); | node_->GetName().c_str()); | ||||
@@ -354,17 +382,22 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||||
} | } | ||||
GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); | GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); | ||||
GE_CHK_STATUS_RET(DoLaunchKernel(stream), "Failed to do launch kernel."); | |||||
return SUCCESS; | |||||
} | |||||
Status TbeOpTask::DoLaunchKernel(rtStream_t stream) { | |||||
auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_); | |||||
if (handle_ == nullptr) { | if (handle_ == nullptr) { | ||||
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream)); | |||||
GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str()); | |||||
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), static_cast<uint32_t>(arg_size_), | |||||
sm_desc, stream)); | |||||
} else { | } else { | ||||
std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_); | std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_); | ||||
std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_); | std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_); | ||||
GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), arg_size_, nullptr, | |||||
stream, kernel_info.c_str())); | |||||
GELOGD("[%s] Done invoking rtKernelLaunchWithHandle successfully", node_->GetName().c_str()); | |||||
GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), | |||||
static_cast<uint32_t>(arg_size_), sm_desc, stream, kernel_info.c_str())); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -30,6 +30,7 @@ | |||||
#include "cce/aicpu_engine_struct.h" | #include "cce/aicpu_engine_struct.h" | ||||
#include "hybrid/node_executor/aicpu/aicpu_ext_info.h" | #include "hybrid/node_executor/aicpu/aicpu_ext_info.h" | ||||
#include "init/gelib.h" | #include "init/gelib.h" | ||||
#include "register/op_tiling.h" | |||||
namespace ge { | namespace ge { | ||||
class StreamResource; | class StreamResource; | ||||
@@ -39,8 +40,7 @@ class OpTask { | |||||
OpTask() = default; | OpTask() = default; | ||||
virtual ~OpTask() = default; | virtual ~OpTask() = default; | ||||
virtual Status LaunchKernel(rtStream_t stream) = 0; | virtual Status LaunchKernel(rtStream_t stream) = 0; | ||||
virtual Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc, | |||||
const vector<GeTensorDesc> &output_desc); | |||||
virtual Status UpdateRunInfo(); | |||||
virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | ||||
void SetModelArgs(std::string model_name, uint32_t model_id); | void SetModelArgs(std::string model_name, uint32_t model_id); | ||||
Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | ||||
@@ -81,22 +81,23 @@ class TbeOpTask : public OpTask { | |||||
void SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | void SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | ||||
const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); | const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); | ||||
Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc, | |||||
const vector<GeTensorDesc> &output_desc) override; | |||||
Status UpdateRunInfo() override; | |||||
const void *GetArgs() const; | const void *GetArgs() const; | ||||
size_t GetArgSize() const; | size_t GetArgSize() const; | ||||
const std::string &GetStubName() const; | const std::string &GetStubName() const; | ||||
void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); | |||||
Status EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size); | |||||
const std::string &GetTaskType() const override; | const std::string &GetTaskType() const override; | ||||
void SetHandle(void *handle); | void SetHandle(void *handle); | ||||
private: | private: | ||||
friend class SingleOpModel; | friend class SingleOpModel; | ||||
friend class TbeTaskBuilder; | |||||
static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor); | static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor); | ||||
Status UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, | Status UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, | ||||
const vector<GeTensorDesc> &output_desc); | const vector<GeTensorDesc> &output_desc); | ||||
Status AllocateWorkspaces(const std::vector<int64_t> &workspace_sizes); | Status AllocateWorkspaces(const std::vector<int64_t> &workspace_sizes); | ||||
Status DoLaunchKernel(rtStream_t stream); | |||||
const void *stub_func_ = nullptr; | const void *stub_func_ = nullptr; | ||||
std::unique_ptr<uint8_t[]> args_; | std::unique_ptr<uint8_t[]> args_; | ||||
@@ -108,6 +109,7 @@ class TbeOpTask : public OpTask { | |||||
void *tiling_buffer_ = nullptr; | void *tiling_buffer_ = nullptr; | ||||
uint32_t max_tiling_size_ = 0; | uint32_t max_tiling_size_ = 0; | ||||
std::string tiling_data_; | std::string tiling_data_; | ||||
std::vector<int64_t> run_info_workspaces_; | |||||
std::vector<void *> workspaces_; | std::vector<void *> workspaces_; | ||||
NodePtr node_; | NodePtr node_; | ||||
@@ -290,86 +290,65 @@ Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m | |||||
} | } | ||||
Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc) { | Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc) { | ||||
size_t arg_size = kernel_def_.args_size(); | |||||
auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | |||||
GE_CHECK_NOTNULL(args); | |||||
auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret)); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
auto task_type = static_cast<rtModelTaskType_t>(task_def_.type()); | |||||
bool is_task_all_kernel = (task_type == RT_MODEL_TASK_ALL_KERNEL); | |||||
size_t arg_size = 0; | |||||
std::unique_ptr<uint8_t[]> args = nullptr; | |||||
if (is_task_all_kernel) { | |||||
GELOGD("SetKernelArgs of %s in branch of RT_MODEL_TASK_ALL_KERNEL.", op_desc->GetName().c_str()); | |||||
arg_size = kernel_def_with_handle_.args_size(); | |||||
args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | |||||
GE_CHECK_NOTNULL(args); | |||||
GE_CHK_RT_RET(rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, | |||||
RT_MEMCPY_HOST_TO_HOST)) | |||||
} else { | |||||
GELOGD("SetKernelArgs of %s in branch of RT_MODEL_TASK_KERNEL.", op_desc->GetName().c_str()); | |||||
arg_size = kernel_def_.args_size(); | |||||
args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | |||||
GE_CHECK_NOTNULL(args); | |||||
GE_CHK_RT_RET(rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST)) | |||||
} | } | ||||
const domi::KernelContext &context = kernel_def_.context(); | |||||
const domi::KernelContext &context = task_type == RT_MODEL_TASK_ALL_KERNEL ? | |||||
kernel_def_with_handle_.context() : kernel_def_.context(); | |||||
const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | ||||
uint16_t offset = *args_offset_tmp; | uint16_t offset = *args_offset_tmp; | ||||
bool is_dynamic = false; | |||||
(void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic); | |||||
if (is_dynamic) { | |||||
GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task)); | |||||
} else { | |||||
// copy args | |||||
std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | |||||
void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | |||||
uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | |||||
rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret)); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
} | |||||
task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc); | |||||
// copy args | |||||
std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | |||||
void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | |||||
uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | |||||
GE_CHK_RT_RET(rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST)); | |||||
return SUCCESS; | |||||
} | |||||
Status TbeTaskBuilder::SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam ¶m, | |||||
const OpDescPtr &op_desc) { | |||||
size_t arg_size = kernel_def_with_handle_.args_size(); | |||||
auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | |||||
GE_CHECK_NOTNULL(args); | |||||
auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret)); | |||||
return rt_ret; | |||||
if (is_task_all_kernel) { | |||||
task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc, | |||||
kernel_def_with_handle_); | |||||
} else { | |||||
task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc); | |||||
} | } | ||||
const domi::KernelContext &context = kernel_def_with_handle_.context(); | |||||
const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | |||||
uint16_t offset = *args_offset_tmp; | |||||
bool is_dynamic = false; | bool is_dynamic = false; | ||||
(void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic); | (void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic); | ||||
if (is_dynamic) { | if (is_dynamic) { | ||||
GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task)); | GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task)); | ||||
} else { | |||||
// copy args | |||||
std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | |||||
void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | |||||
uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | |||||
rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret)); | |||||
return rt_ret; | |||||
if (!param.graph_is_dynamic && task.tiling_buffer_ != nullptr) { | |||||
GELOGD("Need to update run info when graph is static with dynamic node: %s.", op_desc->GetName().c_str()); | |||||
task.UpdateRunInfo(); | |||||
GE_CHK_RT_RET(rtMemcpy(task.tiling_buffer_, task.max_tiling_size_, task.tiling_data_.data(), | |||||
task.tiling_data_.size(), RT_MEMCPY_HOST_TO_DEVICE)); | |||||
} | } | ||||
} | } | ||||
task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc, | |||||
kernel_def_with_handle_); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m) { | Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m) { | ||||
GELOGD("Build tbe task begin"); | GELOGD("Build tbe task begin"); | ||||
auto task_type = static_cast<rtModelTaskType_t>(task_def_.type()); | |||||
auto ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? SetKernelWithHandleArgs(task, param, op_desc_) : | |||||
SetKernelArgs(task, param, op_desc_); | |||||
auto ret = SetKernelArgs(task, param, op_desc_); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return ret; | return ret; | ||||
} | } | ||||
auto task_type = static_cast<rtModelTaskType_t>(task_def_.type()); | |||||
ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? RegisterKernelWithHandle(task, param) : | ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? RegisterKernelWithHandle(task, param) : | ||||
RegisterKernel(task, param); | RegisterKernel(task, param); | ||||
task.SetHandle(handle_); | task.SetHandle(handle_); | ||||
@@ -409,7 +388,7 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) { | |||||
GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); | GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); | ||||
} | } | ||||
task.EnableDynamicSupport(node_, tiling_buffer, static_cast<size_t>(max_size)); | |||||
task.EnableDynamicSupport(node_, tiling_buffer, static_cast<uint32_t>(max_size)); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
} // namespace ge | } // namespace ge |
@@ -97,7 +97,6 @@ class TbeTaskBuilder { | |||||
private: | private: | ||||
Status InitTilingInfo(TbeOpTask &task); | Status InitTilingInfo(TbeOpTask &task); | ||||
Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); | Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); | ||||
Status SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); | |||||
Status GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const; | Status GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const; | ||||
Status RegisterKernel(TbeOpTask &task, const SingleOpModelParam ¶m); | Status RegisterKernel(TbeOpTask &task, const SingleOpModelParam ¶m); | ||||
@@ -110,6 +110,7 @@ const char *const SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel"; | |||||
const char *const ORIGINAL_MODEL_FILE = "ge.originalModelFile"; | const char *const ORIGINAL_MODEL_FILE = "ge.originalModelFile"; | ||||
const char *const INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16"; | const char *const INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16"; | ||||
const char *const OP_DEBUG_LEVEL = "ge.opDebugLevel"; | const char *const OP_DEBUG_LEVEL = "ge.opDebugLevel"; | ||||
const char *const PERFORMANCE_MODE = "ge.performance_mode"; | |||||
} // namespace configure_option | } // namespace configure_option | ||||
// Configure stream num by Session constructor options param, | // Configure stream num by Session constructor options param, | ||||
// its value should be int32_t type, default value is "1" | // its value should be int32_t type, default value is "1" | ||||
@@ -314,6 +315,11 @@ const std::string HCOM_MULTI_MODE = "ge.hcomMultiMode"; | |||||
// atc and ir option | // atc and ir option | ||||
const char *const INPUT_SHAPE_RANGE = "input_shape_range"; | const char *const INPUT_SHAPE_RANGE = "input_shape_range"; | ||||
// Configure express high compile performance or high execute performance | |||||
// normal: no need to compile, used saved .o files directly | |||||
// high: need to recompile, high execute performance mode | |||||
const std::string PERFORMANCE_MODE = "ge.performance_mode"; | |||||
// Graph run mode | // Graph run mode | ||||
enum GraphRunMode { PREDICTION = 0, TRAIN }; | enum GraphRunMode { PREDICTION = 0, TRAIN }; | ||||
@@ -388,6 +394,7 @@ static const char *const MDL_BANK_PATH = ge::MDL_BANK_PATH_FLAG.c_str(); | |||||
static const char *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str(); | static const char *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str(); | ||||
static const char *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str(); | static const char *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str(); | ||||
static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str(); | static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str(); | ||||
static const char *const PERFORMANCE_MODE = ge::PERFORMANCE_MODE.c_str(); | |||||
// for interface: aclgrphBuildModel | // for interface: aclgrphBuildModel | ||||
#ifdef __GNUC__ | #ifdef __GNUC__ | ||||
@@ -412,7 +419,8 @@ const std::set<std::string> ir_builder_suppported_options = {INPUT_FORMAT, | |||||
OP_COMPILER_CACHE_MODE, | OP_COMPILER_CACHE_MODE, | ||||
MDL_BANK_PATH, | MDL_BANK_PATH, | ||||
OP_BANK_PATH, | OP_BANK_PATH, | ||||
OP_BANK_UPDATE}; | |||||
OP_BANK_UPDATE, | |||||
PERFORMANCE_MODE}; | |||||
// for interface: aclgrphParse | // for interface: aclgrphParse | ||||
const std::set<std::string> ir_parser_suppported_options = { | const std::set<std::string> ir_parser_suppported_options = { | ||||
@@ -67,8 +67,9 @@ struct DataBuffer { | |||||
void *data; // Data address | void *data; // Data address | ||||
uint64_t length; // Data length | uint64_t length; // Data length | ||||
bool isDataSupportMemShare = false; | bool isDataSupportMemShare = false; | ||||
DataBuffer(void *dataIn, uint64_t len, bool isSupportMemShare) | |||||
: data(dataIn), length(len), isDataSupportMemShare(isSupportMemShare) {} | |||||
uint32_t placement = 0; | |||||
DataBuffer(void *dataIn, uint64_t len, bool isSupportMemShare, uint32_t placement = 0) | |||||
: data(dataIn), length(len), isDataSupportMemShare(isSupportMemShare), placement(placement) {} | |||||
DataBuffer() : data(nullptr), length(0), isDataSupportMemShare(false) {} | DataBuffer() : data(nullptr), length(0), isDataSupportMemShare(false) {} | ||||
}; | }; | ||||
@@ -65,10 +65,12 @@ class GE_FUNC_VISIBILITY GeGenerator { | |||||
/// @param [in] inputs: input tensors. | /// @param [in] inputs: input tensors. | ||||
/// @param [in] outputs: output tensors. | /// @param [in] outputs: output tensors. | ||||
/// @param [in] model_file_name: name of model file. | /// @param [in] model_file_name: name of model file. | ||||
/// @param [in] compile_flag: op build flag, accurate build is 0, fuzz build is 1 | |||||
/// @return SUCCESS or FAILED | /// @return SUCCESS or FAILED | ||||
/// | /// | ||||
Status BuildSingleOpModel(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs, | Status BuildSingleOpModel(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs, | ||||
const std::vector<GeTensor> &outputs, const std::string &model_file_name); | |||||
const std::vector<GeTensor> &outputs, const std::string &model_file_name, | |||||
int32_t compile_flag = 0); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief: Build single Op into model buff. | /// @brief: Build single Op into model buff. | ||||
@@ -76,10 +78,14 @@ class GE_FUNC_VISIBILITY GeGenerator { | |||||
/// @param [in] inputs: input tensors. | /// @param [in] inputs: input tensors. | ||||
/// @param [in] outputs: output tensors. | /// @param [in] outputs: output tensors. | ||||
/// @param [in] engine_type: engine type. | /// @param [in] engine_type: engine type. | ||||
/// @param [in] compile_flag: op build flag, accurate build is 0, fuzz build is 1 | |||||
/// @param [out] model_buff: model buff of op. | /// @param [out] model_buff: model buff of op. | ||||
/// @return SUCCESS or FAILED | /// @return SUCCESS or FAILED | ||||
// old process will be delete | |||||
Status BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | Status BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | ||||
OpEngineType engine_type, ModelBufferData &model_buff); | OpEngineType engine_type, ModelBufferData &model_buff); | ||||
Status BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | |||||
OpEngineType engine_type, int32_t compile_flag, ModelBufferData &model_buff); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief: Build single Op into model buff. | /// @brief: Build single Op into model buff. | ||||
@@ -97,7 +103,7 @@ class GE_FUNC_VISIBILITY GeGenerator { | |||||
ge::ModelBufferData &model, bool is_offline = true); | ge::ModelBufferData &model, bool is_offline = true); | ||||
Status BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | Status BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | ||||
const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | ||||
bool is_offline = true); | |||||
bool is_offline = true, int32_t compile_flag = 0); | |||||
void RemoveConst(const vector<GeTensor> &inputs, vector<GeTensor> &outputs); | void RemoveConst(const vector<GeTensor> &inputs, vector<GeTensor> &outputs); | ||||
Status CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs); | Status CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs); | ||||
@@ -123,6 +123,7 @@ struct OmgContext { | |||||
bool need_multi_batch = false; | bool need_multi_batch = false; | ||||
std::vector<NodePtr> data_nodes; | std::vector<NodePtr> data_nodes; | ||||
std::vector<NodePtr> getnext_nosink_nodes; | std::vector<NodePtr> getnext_nosink_nodes; | ||||
bool fuzz_compile_flag = false; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
@@ -274,6 +274,7 @@ set(COMMON_SRC_FILES | |||||
"${GE_CODE_DIR}/ge/graph/passes/set_input_output_offset_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/set_input_output_offset_pass.cc" | ||||
"${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc" | ||||
"${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" | "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" | ||||
"${GE_CODE_DIR}/ge/graph/passes/mark_node_unknown_shape_pass.cc" | |||||
"${GE_CODE_DIR}/ge/model/ge_model.cc" | "${GE_CODE_DIR}/ge/model/ge_model.cc" | ||||
"${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc" | "${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc" | ||||
"${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" | ||||
@@ -699,6 +700,8 @@ set(PASS_TEST_FILES | |||||
"graph/passes/multi_batch_clone_pass_unittest.cc" | "graph/passes/multi_batch_clone_pass_unittest.cc" | ||||
"graph/passes/replace_with_empty_const_pass_unittest.cc" | "graph/passes/replace_with_empty_const_pass_unittest.cc" | ||||
"graph/passes/transpose_transdata_pass_unittest.cc" | "graph/passes/transpose_transdata_pass_unittest.cc" | ||||
"graph/passes/mark_node_unknown_shape_pass_unittest.cc" | |||||
"graph/passes/reshape_recovery_pass_unittest.cc" | |||||
) | ) | ||||
set(KERNEL_TEST_FILES | set(KERNEL_TEST_FILES | ||||
@@ -788,6 +791,7 @@ set(SINGLE_OP_TEST_FILES | |||||
"single_op/single_op_manager_unittest.cc" | "single_op/single_op_manager_unittest.cc" | ||||
"single_op/stream_resource_unittest.cc" | "single_op/stream_resource_unittest.cc" | ||||
"single_op/single_op_task_unittest.cc" | "single_op/single_op_task_unittest.cc" | ||||
"single_op/single_op_unittest.cc" | |||||
) | ) | ||||
set(PROFILING_MNG_TEST_FILES | set(PROFILING_MNG_TEST_FILES | ||||
@@ -85,7 +85,7 @@ TEST_F(UtestGeGenerator, test_build_single_op_online) { | |||||
GeGenerator generator; | GeGenerator generator; | ||||
generator.Initialize({}); | generator.Initialize({}); | ||||
ModelBufferData model_buffer; | ModelBufferData model_buffer; | ||||
EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, model_buffer), FAILED); | |||||
EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, false, model_buffer), FAILED); | |||||
} | } | ||||
TEST_F(UtestGeGenerator, test_graph_manager) { | TEST_F(UtestGeGenerator, test_graph_manager) { | ||||
@@ -0,0 +1,115 @@ | |||||
/** | |||||
* Copyright 2021 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include <gtest/gtest.h> | |||||
#include <cstdint> | |||||
#include <memory> | |||||
#include <string> | |||||
#define private public | |||||
#include "graph/passes/mark_node_unknown_shape_pass.h" | |||||
#include "common/ge_inner_error_codes.h" | |||||
#include "inc/pass_manager.h" | |||||
#include "graph/common/local_context.h" | |||||
#undef private | |||||
namespace ge { | |||||
class UtestMarkNodeUnknownShapePass : public testing::Test { | |||||
protected: | |||||
void SetUp() {} | |||||
void TearDown() {} | |||||
public: | |||||
NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { | |||||
GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
auto op_desc = std::make_shared<OpDesc>(name, type); | |||||
for (auto i = 0; i < in_num; ++i) { | |||||
op_desc->AddInputDesc(test_desc); | |||||
} | |||||
for (auto i = 0; i < out_num; ++i) { | |||||
op_desc->AddOutputDesc(test_desc); | |||||
} | |||||
return graph->AddNode(op_desc); | |||||
} | |||||
/// netoutput1 | |||||
/// | | |||||
/// conv1 | |||||
/// \ / | |||||
/// data | |||||
void make_graph(const ComputeGraphPtr &graph) { | |||||
GetLocalOmgContext().fuzz_compile_flag = true; | |||||
auto conv2d_node = MakeNode(graph, 2, 1, "conv1", "Conv2D"); | |||||
{ | |||||
auto data1 = MakeNode(graph, 1, 1, "data", "Data"); | |||||
GeTensorDesc tensor_desc(GeShape({1,3,224,224}), FORMAT_NCHW, DT_FLOAT); | |||||
data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); | |||||
data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); | |||||
GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); | |||||
GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); | |||||
} | |||||
conv2d_node->GetOpDesc()->SetOpKernelLibName("AIcoreEngine"); | |||||
AttrUtils::SetBool(conv2d_node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS, true); | |||||
auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput"); | |||||
GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); | |||||
} | |||||
}; | |||||
TEST_F(UtestMarkNodeUnknownShapePass, test_run_with_GE_kernel) { | |||||
OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL); | |||||
ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default"); | |||||
op_desc->SetOpKernelLibName("GE"); | |||||
graph->AddNode(op_desc); | |||||
PassManager pass; | |||||
pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass); | |||||
EXPECT_EQ(pass.Run(graph), SUCCESS); | |||||
} | |||||
TEST_F(UtestMarkNodeUnknownShapePass, test_run_without_fuzz_attrs) { | |||||
OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL); | |||||
ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default"); | |||||
op_desc->SetOpKernelLibName("AIcoreEngine"); | |||||
graph->AddNode(op_desc); | |||||
GetLocalOmgContext().fuzz_compile_flag = true; | |||||
PassManager pass; | |||||
pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass); | |||||
EXPECT_EQ(pass.Run(graph), SUCCESS); | |||||
} | |||||
TEST_F(UtestMarkNodeUnknownShapePass, test_run_with_fuzz_attrs) { | |||||
ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph"); | |||||
make_graph(graph); | |||||
PassManager pass; | |||||
pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass); | |||||
EXPECT_EQ(pass.Run(graph), SUCCESS); | |||||
EXPECT_EQ(graph->GetAllNodes().size(), 3); | |||||
for (const auto &node : graph->GetAllNodes()) { | |||||
if (node->GetName() == "conv1") { | |||||
auto op_desc = node->GetOpDesc(); | |||||
EXPECT_NE(op_desc, nullptr); | |||||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||||
auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i)); | |||||
EXPECT_TRUE(input_desc->GetShape().GetDim(0) == -2); | |||||
} | |||||
for (auto &output_desc : op_desc->GetAllOutputsDescPtr()) { | |||||
EXPECT_NE(output_desc, nullptr); | |||||
EXPECT_TRUE(output_desc->GetShape().GetDim(0) == -2); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
} // namespace ge |
@@ -0,0 +1,69 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include "graph/passes/reshape_recovery_pass.h" | |||||
#include <gtest/gtest.h> | |||||
#include <set> | |||||
#include <string> | |||||
#include "graph_builder_utils.h" | |||||
namespace ge { | |||||
class UtestReshapeRecoveryPass : public testing::Test { | |||||
protected: | |||||
void SetUp() {} | |||||
void TearDown() {} | |||||
}; | |||||
namespace { | |||||
/// netoutput1 | |||||
/// | \ | |||||
///transdata1 \ | |||||
/// | \ | |||||
/// | transdata2 | |||||
/// | / | |||||
/// var1 const1 | |||||
ut::GraphBuilder Graph1Builder() { | |||||
ut::GraphBuilder builder = ut::GraphBuilder("g2"); | |||||
auto var1 = builder.AddNode("var1", "Variable", 0, 1, FORMAT_ND, DT_FLOAT, {-1}); | |||||
auto const1 = builder.AddNode("const1", "Const", 0, 1, FORMAT_ND, DT_FLOAT, {1, 1, 224, 224}); | |||||
auto transdata2 = builder.AddNode("transdata2", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {224, 224}); | |||||
auto transdata1 = builder.AddNode("transdata1", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {224, 224}); | |||||
auto netoutput1 = builder.AddNode("netoutput1", "Netoutput", 2, 0); | |||||
builder.AddDataEdge(var1, 0, transdata1, 0); | |||||
builder.AddDataEdge(const1, 0, transdata2, 0); | |||||
builder.AddDataEdge(transdata2, 0, netoutput1, 1); | |||||
builder.AddDataEdge(transdata1, 0, netoutput1, 0); | |||||
return builder; | |||||
} | |||||
} // namespace | |||||
TEST_F(UtestReshapeRecoveryPass, reshape_recovery_with_dynamic_shape) { | |||||
auto builder = Graph1Builder(); | |||||
auto graph = builder.GetGraph(); | |||||
ReshapeRecoveryPass reshape_recovery_pass; | |||||
EXPECT_EQ(graph->GetDirectNodesSize(),5); | |||||
Status ret = reshape_recovery_pass.Run(graph); | |||||
EXPECT_EQ(ret, SUCCESS); | |||||
EXPECT_EQ(graph->GetDirectNodesSize(),8); | |||||
auto reshape1 = graph->FindNode("Reshape_ReshapeRecoveryPass_0"); | |||||
EXPECT_NE(reshape1, nullptr); | |||||
} | |||||
} // namespace ge |
@@ -332,3 +332,37 @@ EXPECT_EQ(sub_sub_graph1->GetParentGraph()->GetName(), "MergedGraph"); | |||||
// node "cond_data" & "body_data" has owner compute graph "MergedGraph" before unfold | // node "cond_data" & "body_data" has owner compute graph "MergedGraph" before unfold | ||||
EXPECT_EQ(sub_graph_while_node->GetOwnerComputeGraph()->GetName(), "MergedGraph"); | EXPECT_EQ(sub_graph_while_node->GetOwnerComputeGraph()->GetName(), "MergedGraph"); | ||||
} | } | ||||
TEST_F(UtestGeHybrid, hybrid_model_executor_update_args) { | |||||
auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask()); | |||||
auto graph = make_shared<ComputeGraph>("graph"); | |||||
OpDescPtr op_desc = CreateOpDesc("Add", "Add"); | |||||
GeShape shape({2, 16}); | |||||
GeTensorDesc tensor_desc(shape); | |||||
op_desc->AddInputDesc(tensor_desc); | |||||
op_desc->AddInputDesc(tensor_desc); | |||||
op_desc->AddOutputDesc(tensor_desc); | |||||
auto node = graph->AddNode(op_desc); | |||||
std::unique_ptr<NodeItem> node_item; | |||||
NodeItem::Create(node, node_item); | |||||
node_item->input_start = 0; | |||||
node_item->output_start = 0; | |||||
GraphExecutionContext execution_context; | |||||
SubgraphContext subgraph_context(nullptr, &execution_context); | |||||
subgraph_context.all_inputs_.resize(2); | |||||
subgraph_context.all_outputs_.resize(1); | |||||
NodeState node_state(*node_item, &subgraph_context); | |||||
auto task_context = TaskContext::Create(&node_state, &execution_context, &subgraph_context); | |||||
int32_t buffer[1]; | |||||
aicore_task->tiling_buffer_ = TensorBuffer::Create(buffer, sizeof(buffer)); | |||||
EXPECT_NE(aicore_task->tiling_buffer_, nullptr); | |||||
aicore_task->max_arg_count_ = 0; | |||||
EXPECT_EQ(aicore_task->UpdateArgs(*task_context), ACL_ERROR_GE_MEMORY_OPERATE_FAILED); | |||||
aicore_task->args_ = std::unique_ptr<uint8_t[]>(new uint8_t[sizeof(uintptr_t) * 2]); | |||||
EXPECT_EQ(aicore_task->UpdateArgs(*task_context), SUCCESS); | |||||
} |
@@ -0,0 +1,163 @@ | |||||
/** | |||||
* Copyright 2021 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include <gtest/gtest.h> | |||||
#include <vector> | |||||
#include "runtime/rt.h" | |||||
#define protected public | |||||
#define private public | |||||
#include "single_op/single_op.h" | |||||
#include "single_op/single_op_manager.h" | |||||
#undef private | |||||
#undef protected | |||||
using namespace std; | |||||
using namespace ge; | |||||
class UtestSingleOp : public testing::Test { | |||||
protected: | |||||
void SetUp() {} | |||||
void TearDown() {} | |||||
}; | |||||
TEST_F(UtestSingleOp, test_dynamic_singleop_execute_async) { | |||||
uintptr_t resource_id = 0; | |||||
std::mutex stream_mu; | |||||
rtStream_t stream = nullptr; | |||||
rtStreamCreate(&stream, 0); | |||||
DynamicSingleOp dynamic_single_op(resource_id, &stream_mu, stream); | |||||
vector<int64_t> dims_vec_0 = {2}; | |||||
vector<GeTensorDesc> input_desc; | |||||
GeTensorDesc tensor_desc_0(GeShape(dims_vec_0), FORMAT_NCHW, DT_INT32); | |||||
// input data from device | |||||
AttrUtils::SetInt(tensor_desc_0, ATTR_NAME_PLACEMENT, 0); | |||||
input_desc.emplace_back(tensor_desc_0); | |||||
vector<DataBuffer> input_buffers; | |||||
ge::DataBuffer data_buffer; | |||||
data_buffer.data = new char[4]; | |||||
data_buffer.length = 4; | |||||
input_buffers.emplace_back(data_buffer); | |||||
vector<GeTensorDesc> output_desc; | |||||
vector<DataBuffer> output_buffers; | |||||
// UpdateRunInfo failed | |||||
EXPECT_EQ(dynamic_single_op.ExecuteAsync(input_desc, input_buffers, output_desc, output_buffers), ACL_ERROR_GE_PARAM_INVALID); | |||||
} | |||||
TEST_F(UtestSingleOp, test_dynamic_singleop_execute_async1) { | |||||
uintptr_t resource_id = 0; | |||||
std::mutex stream_mu; | |||||
rtStream_t stream = nullptr; | |||||
rtStreamCreate(&stream, 0); | |||||
DynamicSingleOp dynamic_single_op(resource_id, &stream_mu, stream); | |||||
dynamic_single_op.num_inputs_ = 1; | |||||
vector<int64_t> dims_vec_0 = {2}; | |||||
vector<GeTensorDesc> input_desc; | |||||
GeTensorDesc tensor_desc_0(GeShape(dims_vec_0), FORMAT_NCHW, DT_INT32); | |||||
// input data from host | |||||
AttrUtils::SetInt(tensor_desc_0, ATTR_NAME_PLACEMENT, 1); | |||||
input_desc.emplace_back(tensor_desc_0); | |||||
int64_t input_size = 0; | |||||
EXPECT_EQ(TensorUtils::GetTensorMemorySizeInBytes(tensor_desc_0, input_size), SUCCESS); | |||||
EXPECT_EQ(input_size, 64); | |||||
EXPECT_NE(SingleOpManager::GetInstance().GetResource(resource_id, stream), nullptr); | |||||
vector<DataBuffer> input_buffers; | |||||
ge::DataBuffer data_buffer; | |||||
data_buffer.data = new char[4]; | |||||
data_buffer.length = 4; | |||||
input_buffers.emplace_back(data_buffer); | |||||
vector<GeTensorDesc> output_desc; | |||||
vector<DataBuffer> output_buffers; | |||||
auto *tbe_task = new (std::nothrow) TbeOpTask(); | |||||
ge::OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL); | |||||
ge::ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default"); | |||||
ge::NodePtr node = graph->AddNode(op_desc); | |||||
tbe_task->node_ = node; | |||||
dynamic_single_op.op_task_.reset((OpTask *)(tbe_task)); | |||||
OpDescPtr desc_ptr = MakeShared<OpDesc>("name1", "type1"); | |||||
EXPECT_EQ(desc_ptr->AddInputDesc("x", GeTensorDesc(GeShape({2}), FORMAT_NCHW)), GRAPH_SUCCESS); | |||||
dynamic_single_op.op_task_->op_desc_ = desc_ptr; | |||||
// UpdateRunInfo failed | |||||
EXPECT_EQ(dynamic_single_op.ExecuteAsync(input_desc, input_buffers, output_desc, output_buffers), PARAM_INVALID); | |||||
} | |||||
TEST_F(UtestSingleOp, test_singleop_execute_async1) { | |||||
StreamResource *res = new (std::nothrow) StreamResource(1); | |||||
std::mutex stream_mu; | |||||
rtStream_t stream = nullptr; | |||||
rtStreamCreate(&stream, 0); | |||||
SingleOp single_op(res, &stream_mu, stream); | |||||
vector<DataBuffer> input_buffers; | |||||
ge::DataBuffer data_buffer; | |||||
data_buffer.data = new char[4]; | |||||
data_buffer.length = 4; | |||||
data_buffer.placement = 1; | |||||
input_buffers.emplace_back(data_buffer); | |||||
vector<DataBuffer> output_buffers; | |||||
single_op.input_sizes_.emplace_back(4); | |||||
SingleOpModelParam model_params; | |||||
single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params)); | |||||
single_op.args_.resize(1); | |||||
EXPECT_EQ(single_op.hybrid_model_executor_, nullptr); | |||||
EXPECT_EQ(single_op.running_param_->mem_base, nullptr); | |||||
EXPECT_EQ(single_op.tasks_.size(), 0); | |||||
EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), SUCCESS); | |||||
} | |||||
TEST_F(UtestSingleOp, test_singleop_execute_async2) { | |||||
StreamResource *res = new (std::nothrow) StreamResource(1); | |||||
std::mutex stream_mu; | |||||
rtStream_t stream = nullptr; | |||||
rtStreamCreate(&stream, 0); | |||||
SingleOp single_op(res, &stream_mu, stream); | |||||
vector<DataBuffer> input_buffers; | |||||
ge::DataBuffer data_buffer; | |||||
data_buffer.data = new char[4]; | |||||
data_buffer.length = 4; | |||||
data_buffer.placement = 1; | |||||
input_buffers.emplace_back(data_buffer); | |||||
vector<DataBuffer> output_buffers; | |||||
single_op.input_sizes_.emplace_back(4); | |||||
SingleOpModelParam model_params; | |||||
single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params)); | |||||
single_op.args_.resize(1); | |||||
GeTensorDesc tensor_desc(GeShape({1}), FORMAT_NHWC, DT_UINT64); | |||||
single_op.inputs_desc_.emplace_back(tensor_desc); | |||||
std::shared_ptr<ge::GeRootModel> root_model = ge::MakeShared<ge::GeRootModel>(); | |||||
single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model)); | |||||
single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(), 0, stream)); | |||||
EXPECT_EQ(single_op.running_param_->mem_base, nullptr); | |||||
EXPECT_EQ(single_op.tasks_.size(), 0); | |||||
EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), PARAM_INVALID); | |||||
} |