@@ -195,6 +195,7 @@ set(TRAIN_SRC_LIST | |||
"graph/passes/atomic_addr_clean_pass.cc" | |||
"graph/passes/mark_same_addr_pass.cc" | |||
"graph/passes/mark_graph_unknown_status_pass.cc" | |||
"graph/passes/mark_node_unknown_shape_pass.cc" | |||
"graph/passes/mark_agnostic_pass.cc" | |||
"graph/partition/dynamic_shape_partition.cc" | |||
"graph/partition/stage_partition.cc" | |||
@@ -509,6 +510,7 @@ set(INFER_SRC_LIST | |||
"graph/passes/atomic_addr_clean_pass.cc" | |||
"graph/passes/mark_same_addr_pass.cc" | |||
"graph/passes/mark_graph_unknown_status_pass.cc" | |||
"graph/passes/mark_node_unknown_shape_pass.cc" | |||
"graph/passes/mark_agnostic_pass.cc" | |||
"graph/common/omg_util.cc" | |||
"graph/common/bcast.cc" | |||
@@ -114,6 +114,7 @@ OMG_HOST_SRC_FILES := \ | |||
graph/passes/atomic_addr_clean_pass.cc \ | |||
graph/passes/mark_same_addr_pass.cc \ | |||
graph/passes/mark_graph_unknown_status_pass.cc \ | |||
graph/passes/mark_node_unknown_shape_pass.cc \ | |||
graph/passes/mark_agnostic_pass.cc \ | |||
graph/common/omg_util.cc \ | |||
graph/common/bcast.cc \ | |||
@@ -114,6 +114,7 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
graph/passes/atomic_addr_clean_pass.cc \ | |||
graph/passes/mark_same_addr_pass.cc \ | |||
graph/passes/mark_graph_unknown_status_pass.cc \ | |||
graph/passes/mark_node_unknown_shape_pass.cc \ | |||
graph/passes/mark_agnostic_pass.cc \ | |||
graph/partition/dynamic_shape_partition.cc \ | |||
graph/partition/stage_partition.cc \ | |||
@@ -53,6 +53,7 @@ constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | |||
const int64_t kDynamicDimValue = -2; | |||
const int kDefaultDeviceId = 0; | |||
const int kDefaultJobId = 0; | |||
const int32_t kFuzzBuildPattern = 1; | |||
std::map<ge::OpEngineType, std::string> engine_type_map{ | |||
{ge::ENGINE_SYS, kEngineNameDefault}, | |||
@@ -296,13 +297,44 @@ static Status ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTenso | |||
return SUCCESS; | |||
} | |||
static Status GetFuzzBuildAttrs(const OpDescPtr &op_desc, const GeRootModelPtr &ge_root_model, | |||
GeAttrValue::LIST_NAMED_ATTRS &fuzz_build_attrs) { | |||
GELOGD("Start get fuzz build attrs of %s.", op_desc->GetName().c_str()); | |||
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | |||
for (const auto &node : ge_root_model->GetRootGraph()->GetAllNodes()) { | |||
GE_CHECK_NOTNULL(node); | |||
GE_CHECK_NOTNULL(node->GetOpDesc()); | |||
GELOGD("Delete fuzz build attr of %s after build.", node->GetName().c_str()); | |||
node->GetOpDesc()->DelAttr(ATTR_NAME_FUZZ_BUILD); | |||
} | |||
(void)AttrUtils::GetListNamedAttrs(op_desc, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs); | |||
if (!fuzz_build_attrs.empty()) { | |||
GELOGD("%s has split, get ATTR_NAME_FUZZ_BUILD_RES_ATTRS directly.", op_desc->GetName().c_str()); | |||
return SUCCESS; | |||
} else { | |||
GELOGW("%s build with fuzz build pattern, but not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", op_desc->GetName().c_str()); | |||
} | |||
return SUCCESS; | |||
} | |||
static bool HasShapeRange(const vector<GeTensor> &inputs) { | |||
for (const auto &input : inputs) { | |||
vector<pair<int64_t, int64_t>> shape_range; | |||
(void)input.GetTensorDesc().GetShapeRange(shape_range); | |||
if (!shape_range.empty()) { | |||
GELOGD("Has set shape range."); | |||
return true; | |||
} | |||
} | |||
return false; | |||
} | |||
class GeGenerator::Impl { | |||
public: | |||
Impl(OmgContext &omg_context) : omg_context_(omg_context) {} | |||
~Impl() = default; | |||
Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GeRootModelPtr &ge_models); | |||
Status SaveModel(const string &file_name_prefix, GeModelPtr &models, ModelBufferData &model); | |||
Status SaveRootModel(const string &file_name_prefix, GeRootModelPtr &model, ModelBufferData &model_buff); | |||
@@ -742,7 +774,8 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> | |||
Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | |||
const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | |||
bool is_offline) { | |||
bool is_offline, int32_t compile_flag) { | |||
GELOGD("Inputs size is %zu, outputs size is %zu.", inputs.size(), outputs.size()); | |||
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | |||
impl_->is_offline_ = is_offline; | |||
if (!is_offline) { | |||
@@ -764,6 +797,16 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
OpDescPtr op_desc_tmp = AttrUtils::CloneOpDesc(op_desc); | |||
GE_CHECK_NOTNULL(op_desc_tmp); | |||
bool fuzz_compile_flag = false; | |||
if (!HasShapeRange(inputs) && compile_flag == kFuzzBuildPattern) { | |||
fuzz_compile_flag = true; | |||
} | |||
if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, fuzz_compile_flag)) { | |||
GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD] Failed to set attr for %s.", op_desc->GetName().c_str()); | |||
return FAILED; | |||
} | |||
impl_->omg_context_.fuzz_compile_flag = fuzz_compile_flag; | |||
// 1. Create ComputeGraph. | |||
string name = ge::CurrentTimeInStr() + "_" + model_file_name; | |||
Graph graph; | |||
@@ -810,6 +853,19 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(outputs, outputs_dynamic)); | |||
GE_CHK_STATUS_RET_NOLOG( | |||
impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic)); | |||
} else if (fuzz_compile_flag) { | |||
GELOGD("Get fuzz build result of %s.", op_desc->GetName().c_str()); | |||
(void)AttrUtils::SetInt(ge_model, ATTR_NAME_BUILD_MODE, fuzz_compile_flag); | |||
GeAttrValue::LIST_NAMED_ATTRS fuzz_build_attrs; | |||
if (GetFuzzBuildAttrs(op_desc, ge_root_model, fuzz_build_attrs) != SUCCESS) { | |||
GELOGE(FAILED, "[Get][FuzzRet]Failed to get fuzz build result of %s.", op_desc->GetName().c_str()); | |||
return FAILED; | |||
} | |||
if (!fuzz_build_attrs.empty()) { | |||
GE_CHK_BOOL_EXEC(AttrUtils::SetListNamedAttrs(ge_model, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs), | |||
return FAILED, "Set ATTR_NAME_FUZZ_BUILD_RES_ATTRS failed."); | |||
} | |||
GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | |||
} else { | |||
GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | |||
} | |||
@@ -825,15 +881,17 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
* @param [in] vector<GeTensor> &inputs: Operator input data description information. | |||
* @param [in] vector<GeTensor> &outputs: Operator output data description information. | |||
* @param [in] const string &model_file_name: Offline model filename. | |||
* @param [in] compile_flag: op build flag from atc | |||
* @return SUCCESS handle successfully / others handle failed | |||
*/ | |||
Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | |||
const vector<GeTensor> &outputs, const string &model_file_name) { | |||
const vector<GeTensor> &outputs, const string &model_file_name, | |||
int32_t compile_flag) { | |||
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | |||
GELOGI("Start to build single op offline model, input size: %zu, output size: %zu", inputs.size(), outputs.size()); | |||
ModelBufferData model_buff; | |||
OpEngineType engine_type = ENGINE_SYS; | |||
Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true); | |||
Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true, compile_flag); | |||
GELOGI("Finish build single offline model, status: %u", status); | |||
return status; | |||
} | |||
@@ -850,7 +908,6 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor | |||
* @return SUCCESS handle successfully / others handle failed | |||
*/ | |||
// old process will be deleted | |||
Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | |||
const vector<GeTensor> &outputs, OpEngineType engine_type, | |||
ModelBufferData &model_buff) { | |||
@@ -864,7 +921,12 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor | |||
Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | |||
const vector<GeTensor> &outputs, OpEngineType engine_type, int32_t compile_flag, | |||
ModelBufferData &model_buff) { | |||
return SUCCESS; | |||
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); | |||
GELOGI("Start to build single op online, input size: %zu, output size: %zu", inputs.size(), outputs.size()); | |||
Status status = BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false, | |||
compile_flag); | |||
GELOGI("Finish build single online model, status: %u", status); | |||
return status; | |||
} | |||
Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs, | |||
@@ -61,6 +61,7 @@ | |||
#include "graph/passes/iterator_op_pass.h" | |||
#include "graph/passes/link_gen_mask_nodes_pass.h" | |||
#include "graph/passes/mark_graph_unknown_status_pass.h" | |||
#include "graph/passes/mark_node_unknown_shape_pass.h" | |||
#include "graph/passes/merge_pass.h" | |||
#include "graph/passes/merge_input_memcpy_pass.h" | |||
#include "graph/passes/merge_to_stream_merge_pass.h" | |||
@@ -864,6 +865,8 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||
} | |||
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kPrepareOptimize); | |||
// set fuzz compile flag after origin graph optimize | |||
GE_CHK_STATUS_RET(SetFuzzCompileFlag(compute_graph), "Set fuzz compile flag failed."); | |||
ret = PreRunOptimizeSubGraph(graph_node, compute_graph, session_id); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Run PreRunOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str()); | |||
@@ -878,7 +881,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||
options_.build_step == BUILD_STEP_AFTER_BUILDER || | |||
options_.build_step == BUILD_STEP_AFTER_BUILDER_SUB)); | |||
if (run_after_optimize_subgraph) { | |||
Status ret = PreRunAfterOptimizeSubGraph(graph_node, compute_graph, ge_root_model, session_id); | |||
ret = PreRunAfterOptimizeSubGraph(graph_node, compute_graph, ge_root_model, session_id); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Run PreRunAfterOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str()); | |||
return ret; | |||
@@ -896,6 +899,22 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||
return SUCCESS; | |||
} | |||
Status GraphManager::SetFuzzCompileFlag(ComputeGraphPtr &compute_graph) { | |||
if (!GetLocalOmgContext().fuzz_compile_flag) { | |||
return SUCCESS; | |||
} | |||
for (const auto &node : compute_graph->GetAllNodes()) { | |||
OpDescPtr op_desc = node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
GELOGD("Fuzz compile flag is %d.", GetLocalOmgContext().fuzz_compile_flag); | |||
if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, GetLocalOmgContext().fuzz_compile_flag)) { | |||
GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD]Failed to set fuzz build attr to %s.", op_desc->GetName().c_str()); | |||
return FAILED; | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status GraphManager::SubexpressionMigration(ComputeGraphPtr &compute_graph) { | |||
PassManager pass_manager; | |||
GE_CHK_STATUS_RET(pass_manager.AddPass("SubexpressionMigrationPass", new (std::nothrow) SubexpressionMigrationPass)); | |||
@@ -2488,6 +2507,8 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||
GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::CompileNodesPass", | |||
new (std::nothrow) CompileNodesPass)) | |||
GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass( | |||
"OptimizeStage2::AfterMergePasses::MarkNodeUnknownShapePass", new(std::nothrow) MarkNodeUnknownShapePass)) | |||
GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass( | |||
"OptimizeStage2::AfterMergePasses::MarkGraphUnknownStatusPass", new(std::nothrow) MarkGraphUnknownStatusPass)) | |||
GE_CHK_STATUS_RET( | |||
pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::InputOutputConnectionIdentifyPass", | |||
@@ -358,6 +358,7 @@ class GraphManager { | |||
ComputeGraphPtr &compute_graph, | |||
GeRootModelPtr &ge_root_model, | |||
uint64_t session_id); | |||
Status SetFuzzCompileFlag(ComputeGraphPtr &compute_graph); | |||
Status CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_graph, | |||
Graph2SubGraphInfoList &sub_graph_map, | |||
@@ -0,0 +1,99 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include "graph/passes/mark_node_unknown_shape_pass.h" | |||
#include "graph/utils/node_utils.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "graph/common/local_context.h" | |||
namespace ge { | |||
namespace { | |||
const char *const kEngineNameAiCore = "AIcoreEngine"; | |||
const char *const kNeedRefreshShape = "_need_generate"; | |||
const char *const kOriginalNode = "_original_node"; | |||
const int32_t kDynamicState = -2; | |||
} | |||
Status MarkNodeUnknownShapePass::Run(ComputeGraphPtr graph) { | |||
GE_CHECK_NOTNULL(graph); | |||
if (!GetLocalOmgContext().fuzz_compile_flag) { | |||
return SUCCESS; | |||
} | |||
if (IsAllAicoreSupportDyn(graph)) { | |||
if (UpdateNodeShapeToUnknown(graph) != SUCCESS) { | |||
GELOGE(FAILED, "[Update][Node_Shape]Failed to update node shape to unknown."); | |||
return FAILED; | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
bool MarkNodeUnknownShapePass::IsAllAicoreSupportDyn(ComputeGraphPtr &graph) { | |||
bool is_all_aicore_support_dyn = false; | |||
for (const auto &node : graph->GetAllNodes()) { | |||
if (node->GetOpDesc() == nullptr) { | |||
continue; | |||
} | |||
if (node->GetOpDesc()->GetOpKernelLibName() != kEngineNameAiCore) { | |||
GELOGD("Kernel of %s is %s.", node->GetName().c_str(), node->GetOpDesc()->GetOpKernelLibName().c_str()); | |||
continue; | |||
} | |||
NodePtr original_node = nullptr; | |||
original_node = node->GetOpDesc()->TryGetExtAttr(kOriginalNode, original_node); | |||
if ((original_node == nullptr && AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS)) || | |||
(original_node != nullptr && AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS) && | |||
!AttrUtils::HasAttr(original_node->GetOpDesc(), kNeedRefreshShape))) { | |||
GELOGD("%s has set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str()); | |||
is_all_aicore_support_dyn = true; | |||
} else { | |||
GELOGD("%s has not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str()); | |||
is_all_aicore_support_dyn = false; | |||
break; | |||
} | |||
} | |||
return is_all_aicore_support_dyn; | |||
} | |||
Status MarkNodeUnknownShapePass::UpdateNodeShapeToUnknown(ComputeGraphPtr &graph) { | |||
GELOGD("Need to update node shape to dynamic when get fuzz build result."); | |||
for (const auto &node : graph->GetAllNodes()) { | |||
if (NodeUtils::IsConst(*node) || node->GetType() == VARIABLE) { | |||
continue; | |||
} | |||
auto op_desc = node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||
auto src_node = NodeUtils::GetInDataNodeByIndex(*node, static_cast<int>(i)); | |||
if (src_node != nullptr && (NodeUtils::IsConst(*src_node) || src_node->GetType() == VARIABLE)) { | |||
continue; | |||
} | |||
GELOGD("Update input shape for %s.", node->GetName().c_str()); | |||
auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i)); | |||
if (input_desc != nullptr) { | |||
input_desc->SetShape(GeShape({kDynamicState})); | |||
} | |||
} | |||
for (auto &output_desc : op_desc->GetAllOutputsDescPtr()) { | |||
if (output_desc != nullptr) { | |||
GELOGD("Update output shape for %s.", node->GetName().c_str()); | |||
output_desc->SetShape(GeShape({kDynamicState})); | |||
} | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
} // namespace ge |
@@ -0,0 +1,32 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_ | |||
#define GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_ | |||
#include "graph/graph.h" | |||
#include "inc/graph_pass.h" | |||
namespace ge { | |||
class MarkNodeUnknownShapePass : public GraphPass { | |||
public: | |||
Status Run(ComputeGraphPtr graph); | |||
private: | |||
bool IsAllAicoreSupportDyn(ComputeGraphPtr &graph); | |||
Status UpdateNodeShapeToUnknown(ComputeGraphPtr &graph); | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_ |
@@ -55,9 +55,17 @@ Status InsertReshapeIfNeed(const NodePtr &node) { | |||
GE_CHECK_NOTNULL(dst_node->GetOpDesc()); | |||
auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx()); | |||
GE_CHECK_NOTNULL(dst_tensor); | |||
bool is_need_insert_reshape = src_tensor->GetShape().GetDims() != UNKNOWN_RANK && | |||
dst_tensor->GetShape().GetDims() != UNKNOWN_RANK && | |||
src_tensor->GetShape().GetDims() != dst_tensor->GetShape().GetDims(); | |||
bool is_dynamic = false; | |||
const auto &src_tensor_dims = src_tensor->GetShape().GetDims(); | |||
const auto &dst_tensor_dims = dst_tensor->GetShape().GetDims(); | |||
if ((std::any_of(src_tensor_dims.begin(), src_tensor_dims.end(), [](int64_t val) { return val < 0 ; })) | |||
|| (std::any_of(dst_tensor_dims.begin(), dst_tensor_dims.end(), [](int64_t val) { return val < 0; }))) { | |||
GELOGD("No need to insert reshape node between %s nad %s.", node->GetName().c_str(), | |||
dst_node->GetName().c_str()); | |||
is_dynamic = true; | |||
} | |||
bool is_need_insert_reshape = src_tensor_dims != dst_tensor_dims && | |||
!is_dynamic; | |||
if (is_need_insert_reshape) { | |||
auto reshape = CreateReshape(src_tensor, dst_tensor, node->GetOwnerComputeGraph()); | |||
GE_CHECK_NOTNULL(reshape); | |||
@@ -54,6 +54,7 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { | |||
"[%s] check input node shape by shape range failed.", | |||
root_graph_item->GetName().c_str()); | |||
} | |||
if (context_.global_step != nullptr) { | |||
GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration, | |||
sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream)); | |||
@@ -100,8 +101,10 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, | |||
GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 1, stream_, device_id)); | |||
} | |||
HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | |||
RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | |||
if (!model_->IsSingleOp()) { | |||
HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | |||
RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | |||
} | |||
args.outputs.clear(); | |||
HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); | |||
@@ -168,7 +168,7 @@ Status NodeItem::InitInputsAndOutputs() { | |||
Status NodeItem::ResolveDynamicState() { | |||
(void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); | |||
GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic); | |||
GELOGD("Node name is %s, dynamic state is %d.", this->node_name.c_str(), is_dynamic); | |||
if (!is_dynamic) { | |||
GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_dynamic), | |||
"[%s] Failed to get shape status.", | |||
@@ -22,6 +22,7 @@ | |||
#include "hybrid/node_executor/aicore/aicore_task_builder.h" | |||
#include "graph/load/model_manager/tbe_handle_store.h" | |||
#include "graph/types.h" | |||
#include "single_op/task/build_task_utils.h" | |||
using optiling::OpRunInfo; | |||
@@ -31,6 +32,7 @@ namespace { | |||
constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | |||
constexpr char const *kAttrOpParamSize = "op_para_size"; | |||
constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | |||
std::atomic<std::uint64_t> log_id(0); | |||
} // namespace | |||
TbeHandleHolder::TbeHandleHolder(void *bin_handle) | |||
@@ -48,6 +50,12 @@ bool TbeHandleRegistry::AddHandle(std::unique_ptr<TbeHandleHolder> &&holder) { | |||
} | |||
Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | |||
log_name_ = op_desc.GetName() + "_tvmbin"; | |||
log_id_ = log_id++; | |||
auto op_desc_ptr = MakeShared<OpDesc>(op_desc); | |||
GE_CHECK_NOTNULL(op_desc_ptr); | |||
auto task_info = BuildTaskUtils::GetTaskInfo(op_desc_ptr); | |||
GELOGI("[TASK_INFO] %lu/%s %s.", log_id_, log_name_.c_str(), task_info.c_str()); | |||
GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); | |||
GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); | |||
@@ -67,6 +75,7 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) | |||
output_indices_to_skip_.push_back(i); | |||
} | |||
} | |||
GELOGI("[TASK_INFO] %lu/%s.", log_id_, log_name_.c_str()); | |||
return SUCCESS; | |||
} | |||
@@ -114,6 +114,8 @@ class AiCoreOpTask { | |||
uint32_t tiling_key_ = 0; | |||
void *handle_ = nullptr; | |||
bool is_dynamic_ = false; | |||
uint64_t log_id_ = 0; | |||
std::string log_name_; | |||
}; | |||
class AtomicAddrCleanOpTask : public AiCoreOpTask { | |||
@@ -216,6 +216,10 @@ DEFINE_string(op_bank_path, "", "Optional; op bank path"); | |||
DEFINE_string(display_model_info, "0", "Optional; display model info"); | |||
DEFINE_string(performance_mode, "", "Optional; express high compile performance or high execute performance." | |||
"normal: no need to compile, used saved .o files directly;" | |||
"high: need to recompile, high execute performance mode."); | |||
class GFlagUtils { | |||
public: | |||
/** | |||
@@ -330,7 +334,8 @@ class GFlagUtils { | |||
"Default value: $HOME/atc_data\n" | |||
" --op_compiler_cache_mode Set the operator compilation cache mode." | |||
"Options are disable(default), enable and force(force to refresh the cache)\n" | |||
" --display_model_info enable for display model info; 0(default): close display, 1: open display"); | |||
" --display_model_info enable for display model info; 0(default): close display, 1: open display.\n" | |||
" --performance_mode Set high performance mode of compile or execute."); | |||
gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); | |||
// Using gflags to analyze input parameters | |||
@@ -1078,6 +1083,7 @@ static void SetEnvForSingleOp(std::map<string, string> &options) { | |||
options.emplace(ge::OP_COMPILER_CACHE_MODE, FLAGS_op_compiler_cache_mode); | |||
options.emplace(ge::MDL_BANK_PATH_FLAG, FLAGS_mdl_bank_path); | |||
options.emplace(ge::OP_BANK_PATH_FLAG, FLAGS_op_bank_path); | |||
options.emplace(ge::PERFORMANCE_MODE, FLAGS_performance_mode); | |||
} | |||
domi::Status GenerateSingleOp(const std::string& json_file_path) { | |||
@@ -1124,7 +1130,7 @@ domi::Status GenerateSingleOp(const std::string& json_file_path) { | |||
output_path = FLAGS_output + "/"; | |||
} | |||
output_path += param.file_name; | |||
ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path); | |||
ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path, param.compile_flag); | |||
if (ret != SUCCESS) { | |||
DOMI_LOGE("Compile op failed. ge ret = %u, op index = %d", ret, index); | |||
ret = domi::FAILED; | |||
@@ -1229,6 +1235,8 @@ domi::Status GenerateOmModel() { | |||
options.insert(std::pair<string, string>(string(ge::OP_BANK_PATH_FLAG), FLAGS_op_bank_path)); | |||
options.insert(std::pair<string, string>(string(ge::DISPLAY_MODEL_INFO), FLAGS_display_model_info)); | |||
options.insert(std::pair<string, string>(string(ge::PERFORMANCE_MODE), FLAGS_performance_mode)); | |||
// set enable scope fusion passes | |||
SetEnableScopeFusionPasses(FLAGS_enable_scope_fusion_passes); | |||
// print atc option map | |||
@@ -53,6 +53,7 @@ constexpr char const *kKeyOriginFormat = "origin_format"; | |||
constexpr char const *kFileSuffix = ".om"; | |||
constexpr char const *kKeyDynamicInput = "dynamic_input"; | |||
constexpr char const *kKeyDynamicOutput = "dynamic_output"; | |||
constexpr char const *kKeyCompileFlag = "compile_flag"; | |||
constexpr int kDumpJsonIndent = 2; | |||
constexpr int kShapeRangePairSize = 2; | |||
constexpr int kShapeRangeLow = 0; | |||
@@ -265,7 +266,10 @@ void from_json(const Json &j, SingleOpAttr &attr) { | |||
} | |||
void from_json(const Json &j, SingleOpDesc &desc) { | |||
desc.op = j.at(kKeyOp).get<string>(); | |||
auto op = j.find(kKeyOp); | |||
if (op != j.end()) { | |||
desc.op = j.at(kKeyOp).get<string>(); | |||
} | |||
auto input_desc = j.find(kKeyInputDesc); | |||
if (input_desc != j.end()) { | |||
@@ -281,6 +285,11 @@ void from_json(const Json &j, SingleOpDesc &desc) { | |||
if (attr_field != j.end()) { | |||
desc.attrs = attr_field->get<vector<SingleOpAttr>>(); | |||
} | |||
auto compile_flag = j.find(kKeyCompileFlag); | |||
if (compile_flag != j.end()) { | |||
desc.compile_flag = compile_flag->get<int32_t>(); | |||
} | |||
} | |||
Status SingleOpParser::ReadJsonFile(const std::string &file, Json &json_obj) { | |||
@@ -583,10 +592,16 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<Si | |||
return ret; | |||
} | |||
int32_t compile_flag = 0; | |||
for (const Json &single_op_json : single_op_list_json) { | |||
SingleOpDesc single_op_desc; | |||
GELOGI("Parsing op[%d], jsonStr = %s", index, single_op_json.dump(kDumpJsonIndent).c_str()); | |||
single_op_desc = single_op_json; | |||
GELOGD("Compile flag is %d.", single_op_desc.compile_flag); | |||
if (single_op_desc.compile_flag == 1) { | |||
compile_flag = single_op_desc.compile_flag; | |||
continue; | |||
} | |||
if (UpdateDynamicTensorName(single_op_desc.input_desc) != SUCCESS) { | |||
GELOGE(FAILED, "[Update][DynamicTensorName] failed for invalid input param!"); | |||
REPORT_CALL_ERROR("E19999", "UpdateDynamicTensorName failed for invalid input param."); | |||
@@ -604,6 +619,7 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<Si | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
param.compile_flag = compile_flag; | |||
op_list.emplace_back(param); | |||
GELOGI("Parse the index[%d] of op success", index); | |||
@@ -55,6 +55,7 @@ struct SingleOpDesc { | |||
std::vector<SingleOpTensorDesc> input_desc; | |||
std::vector<SingleOpTensorDesc> output_desc; | |||
std::vector<SingleOpAttr> attrs; | |||
int32_t compile_flag = 0; | |||
}; | |||
struct SingleOpBuildParam { | |||
@@ -62,6 +63,7 @@ struct SingleOpBuildParam { | |||
std::vector<ge::GeTensor> inputs; | |||
std::vector<ge::GeTensor> outputs; | |||
std::string file_name; | |||
int32_t compile_flag = 0; | |||
}; | |||
void from_json(const nlohmann::json &json, SingleOpTensorDesc &desc); | |||
@@ -34,6 +34,9 @@ const size_t kDataMemAlignSize = 32; | |||
const size_t kDataMemAlignUnit = 2; | |||
const string kShapeTypeDynamic = "dynamic"; | |||
const string kShapeTypeStatic = "static"; | |||
const int64_t kHostMemType = 1; | |||
const uint32_t kFuzzDeviceBufferSize = 1 * 1024 * 1024; | |||
const uint32_t kAlignBytes = 512; | |||
size_t GetAlignedSize(size_t size) { | |||
size_t aligned_size = (size + kDataMemAlignUnit * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; | |||
@@ -65,6 +68,72 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { | |||
profiling_manager.ReportProfilingData(model_id, task_desc_info); | |||
return SUCCESS; | |||
} | |||
Status CalInputsHostMemSize(const std::vector<DataBuffer> &inputs, | |||
std::vector<std::pair<size_t, uint64_t>> &inputs_size) { | |||
int64_t total_size = 0; | |||
size_t index = 0; | |||
for (auto &input_buffer : inputs) { | |||
int64_t input_size = 0; | |||
if (input_buffer.placement == kHostMemType) { | |||
GE_CHECK_LE(input_buffer.length, INT64_MAX); | |||
input_size = input_buffer.length; | |||
// input_size pad to 512 | |||
GE_CHK_STATUS_RET(CheckInt64AddOverflow(input_size, (kAlignBytes - 1)), "Padding size is beyond the INT64_MAX."); | |||
input_size = ((input_size + kAlignBytes - 1) / kAlignBytes) * kAlignBytes; | |||
inputs_size.emplace_back(index, input_size); | |||
GE_CHK_STATUS_RET(CheckInt64AddOverflow(total_size, input_size), "Total size is beyond the INT64_MAX."); | |||
total_size += input_size; | |||
GELOGD("The %zu input mem type is host, tensor size is %ld.", index, input_size); | |||
} | |||
index++; | |||
} | |||
if (total_size > kFuzzDeviceBufferSize) { | |||
GELOGE(FAILED, "[Check][Size]Total size is %ld, larger than 1M.", total_size); | |||
return FAILED; | |||
} | |||
return SUCCESS; | |||
} | |||
Status UpdateInputsBufferAddr(StreamResource *stream_resource, rtStream_t stream, | |||
const std::vector<std::pair<size_t, uint64_t>> &inputs_size, | |||
std::vector<DataBuffer> &update_buffers) { | |||
GE_CHECK_NOTNULL(stream_resource); | |||
if (stream_resource->Init() != SUCCESS) { | |||
GELOGE(FAILED, "[Malloc][Memory]Failed to malloc device buffer."); | |||
return FAILED; | |||
} | |||
auto dst_addr = reinterpret_cast<uint8_t *>(stream_resource->GetDeviceBufferAddr()); | |||
// copy host mem from input_buffer to device mem of dst_addr | |||
for (const auto &input_size : inputs_size) { | |||
size_t index = input_size.first; | |||
auto size = input_size.second; | |||
GELOGD("Do H2D for %zu input, dst size is %zu, src length is %lu.", index, size, update_buffers[index].length); | |||
GE_CHK_RT_RET(rtMemcpyAsync(dst_addr, size, update_buffers[index].data, update_buffers[index].length, | |||
RT_MEMCPY_HOST_TO_DEVICE_EX, stream)); | |||
update_buffers[index].data = dst_addr; | |||
dst_addr = reinterpret_cast<uint8_t *>(dst_addr + size); | |||
} | |||
return SUCCESS; | |||
} | |||
Status InitHybridModelArgs(const std::vector<DataBuffer> &input_buffers, | |||
const std::vector<DataBuffer> &output_buffers, | |||
const std::vector<GeTensorDesc> &inputs_desc, | |||
hybrid::HybridModelExecutor::ExecuteArgs &args) { | |||
for (auto &input : input_buffers) { | |||
args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length)); | |||
} | |||
for (auto &output : output_buffers) { | |||
args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length)); | |||
} | |||
for (auto &tensor_desc : inputs_desc) { | |||
auto desc = MakeShared<GeTensorDesc>(tensor_desc); | |||
GE_CHECK_NOTNULL(desc); | |||
args.input_desc.emplace_back(desc); | |||
} | |||
return SUCCESS; | |||
} | |||
} // namespace | |||
SingleOp::SingleOp(StreamResource *stream_resource, std::mutex *stream_mutex, rtStream_t stream) | |||
@@ -168,13 +237,28 @@ Status SingleOp::UpdateArgs(const std::vector<DataBuffer> &inputs, const std::ve | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(const std::vector<DataBuffer> &inputs, | |||
const std::vector<DataBuffer> &outputs) { | |||
GELOGD("Start SingleOp::ExecuteAsync."); | |||
Status ret = ValidateArgs(inputs, outputs); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
GE_CHECK_NOTNULL(stream_resource_); | |||
vector<pair<size_t, uint64_t>> inputs_size; | |||
GE_CHK_STATUS_RET_NOLOG(CalInputsHostMemSize(inputs, inputs_size)); | |||
std::lock_guard<std::mutex> lk(*stream_mutex_); | |||
vector<DataBuffer> update_buffers = inputs; | |||
if (!inputs_size.empty()) { | |||
GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource_, stream_, inputs_size, update_buffers)); | |||
} | |||
if (hybrid_model_executor_ != nullptr) { | |||
GELOGD("Execute multi-task single op by hybrid model executor"); | |||
hybrid::HybridModelExecutor::ExecuteArgs args; | |||
GE_CHK_STATUS_RET_NOLOG(InitHybridModelArgs(update_buffers, outputs, inputs_desc_, args)); | |||
return hybrid_model_executor_->Execute(args); | |||
} | |||
auto current_mem_base = stream_resource_->GetMemoryBase(); | |||
if (running_param_->mem_base != current_mem_base) { | |||
running_param_->mem_base = const_cast<uint8_t *>(current_mem_base); | |||
@@ -185,7 +269,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c | |||
task->GetOpdesc()->GetName().c_str()); | |||
} | |||
} | |||
ret = UpdateArgs(inputs, outputs); | |||
ret = UpdateArgs(update_buffers, outputs); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
@@ -252,33 +336,64 @@ Status DynamicSingleOp::ValidateParams(const vector<GeTensorDesc> &input_desc, | |||
return SUCCESS; | |||
} | |||
Status DynamicSingleOp::SetHostTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size, | |||
const vector<GeTensorDesc> &input_desc, | |||
const std::vector<DataBuffer> &input_buffers) { | |||
auto op_desc = op_task_->GetOpdesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
GELOGD("Start update inputs tensor value of %s.", op_desc->GetName().c_str()); | |||
for (const auto &input_size : inputs_size) { | |||
size_t index = input_size.first; | |||
auto ge_tensor_desc = input_desc.at(index); | |||
// reconstruct GeTensor by DataBuffer | |||
GeTensorPtr ge_tensor = MakeShared<GeTensor>(ge_tensor_desc); | |||
GE_CHECK_NOTNULL(ge_tensor); | |||
GELOGD("The %zu tensor input type is host, desc data type is %d, input buffer addr is %p, size is %ld.", | |||
index, ge_tensor_desc.GetDataType(), input_buffers[index].data, input_buffers[index].length); | |||
if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(input_buffers[index].data), | |||
static_cast<size_t>(input_buffers[index].length)) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "[Set][Data]Failed to set data of ge tensor."); | |||
return INTERNAL_ERROR; | |||
} | |||
auto tensor_desc = op_desc->MutableInputDesc(index); | |||
GE_CHECK_NOTNULL(tensor_desc); | |||
if (!AttrUtils::SetTensor(tensor_desc, ATTR_NAME_VALUE, ge_tensor)) { | |||
GELOGE(FAILED, "[Set][ATTR_NAME_VALUE]Failed to set ATTR_NAME_VALUE to %s.", op_desc->GetName().c_str()); | |||
return FAILED; | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||
const vector<DataBuffer> &input_buffers, | |||
vector<GeTensorDesc> &output_desc, | |||
vector<DataBuffer> &output_buffers) { | |||
GELOGD("Start DynamicSingleOp::ExecuteAsync."); | |||
GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); | |||
vector<pair<size_t, uint64_t>> inputs_size; | |||
GE_CHK_STATUS_RET_NOLOG(CalInputsHostMemSize(input_buffers, inputs_size)); | |||
vector<DataBuffer> update_buffers = input_buffers; | |||
std::lock_guard<std::mutex> lk(*stream_mutex_); | |||
if (!inputs_size.empty()) { | |||
StreamResource *stream_resource = SingleOpManager::GetInstance().GetResource(resource_id_, stream_); | |||
GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource, stream_, inputs_size, update_buffers)); | |||
} | |||
if (hybrid_model_executor_ != nullptr) { | |||
GELOGD("Execute multi-task dynamic single op by hybrid model executor"); | |||
hybrid::HybridModelExecutor::ExecuteArgs args; | |||
for (auto &input : input_buffers) { | |||
args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length)); | |||
} | |||
for (auto &output : output_buffers) { | |||
args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length)); | |||
} | |||
for (auto &tensor_desc : input_desc) { | |||
auto desc = MakeShared<GeTensorDesc>(tensor_desc); | |||
GE_CHECK_NOTNULL(desc); | |||
args.input_desc.emplace_back(desc); | |||
} | |||
GE_CHK_STATUS_RET_NOLOG(InitHybridModelArgs(update_buffers, output_buffers, input_desc, args)); | |||
return hybrid_model_executor_->Execute(args); | |||
} | |||
std::lock_guard<std::mutex> lk(*stream_mutex_); | |||
GE_CHECK_NOTNULL(op_task_); | |||
GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | |||
if (!inputs_size.empty()) { | |||
GE_CHK_STATUS_RET_NOLOG(SetHostTensorValue(inputs_size, input_desc, input_buffers)); | |||
GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, update_buffers, output_desc, output_buffers, stream_)); | |||
} else { | |||
GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | |||
} | |||
GE_CHK_STATUS_RET_NOLOG(op_task_->OpenDump(stream_)); | |||
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | |||
return SUCCESS; | |||
@@ -59,6 +59,9 @@ class SingleOp { | |||
std::vector<OpTask *> tasks_; | |||
std::vector<std::vector<uintptr_t *>> arg_table_; | |||
std::unique_ptr<SingleOpModelParam> running_param_; | |||
std::unique_ptr<hybrid::HybridModel> hybrid_model_; | |||
std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_; | |||
std::vector<GeTensorDesc> inputs_desc_; | |||
}; | |||
class DynamicSingleOp { | |||
@@ -76,7 +79,8 @@ class DynamicSingleOp { | |||
const std::vector<DataBuffer> &inputs, | |||
std::vector<GeTensorDesc> &output_desc, | |||
std::vector<DataBuffer> &outputs) const; | |||
Status SetHostTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size, | |||
const vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers); | |||
std::unique_ptr<OpTask> op_task_; | |||
std::unique_ptr<hybrid::HybridModel> hybrid_model_; | |||
std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_; | |||
@@ -85,6 +89,7 @@ class DynamicSingleOp { | |||
rtStream_t stream_ = nullptr; | |||
size_t num_inputs_ = 0; | |||
size_t num_outputs_ = 0; | |||
ComputeGraphPtr compute_graph_; | |||
}; | |||
} // namespace ge | |||
#endif // GE_SINGLE_OP_SINGLE_OP_H_ |
@@ -43,6 +43,8 @@ using std::vector; | |||
namespace ge { | |||
namespace { | |||
const size_t kDataOutputNum = 1; | |||
const uint32_t kOutputIndexOfData = 0; | |||
constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | |||
Status IfInferDepend(GeModelPtr &ge_model, bool &flag) { | |||
auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); | |||
@@ -51,7 +53,9 @@ Status IfInferDepend(GeModelPtr &ge_model, bool &flag) { | |||
auto op_desc = node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
const auto &depends = op_desc->GetOpInferDepends(); | |||
if (!depends.empty()) { | |||
bool support_dynamic_shape = false; | |||
(void)AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, support_dynamic_shape); | |||
if (!depends.empty() && support_dynamic_shape) { | |||
flag = true; | |||
return SUCCESS; | |||
} | |||
@@ -462,6 +466,31 @@ Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTa | |||
*task = aicpucc_task.release(); | |||
return SUCCESS; | |||
} | |||
Status SingleOpModel::InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, | |||
SingleOp &single_op) { | |||
for (const auto &op_desc : data_ops_) { | |||
auto output_tensor_desc = op_desc->GetOutputDesc(kOutputIndexOfData); | |||
GeTensorDesc tensor_desc(output_tensor_desc); | |||
single_op.inputs_desc_.emplace_back(tensor_desc); | |||
GELOGD("Init inputs desc from %s.", op_desc->GetName().c_str()); | |||
} | |||
GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized()); | |||
auto root_model = model_helper_.GetGeRootModel(); | |||
GE_CHECK_NOTNULL(root_model); | |||
root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph())); | |||
root_model->SetSubgraphInstanceNameToModel(root_model->GetRootGraph()->GetName(), ge_model); | |||
single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model)); | |||
GE_CHECK_NOTNULL(single_op.hybrid_model_); | |||
GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "[Init][HybridModel]Failed."); | |||
int32_t device_id = 0; | |||
GE_CHK_RT_RET(rtGetDevice(&device_id)); | |||
single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(), | |||
device_id, | |||
resource.GetStream())); | |||
GE_CHECK_NOTNULL(single_op.hybrid_model_executor_); | |||
GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "[Init][HybridModelExecutor]Failed."); | |||
return SUCCESS; | |||
} | |||
Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { | |||
GE_CHK_STATUS_RET_NOLOG(ParseInputsAndOutputs()); | |||
@@ -469,10 +498,20 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { | |||
single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params_)); | |||
GE_CHECK_NOTNULL(single_op.running_param_); | |||
GE_CHK_STATUS_RET_NOLOG(SetInputsAndOutputs(single_op)); | |||
auto ge_model = model_helper_.GetGeModel(); | |||
GE_CHECK_NOTNULL(ge_model); | |||
bool infer_depend_flag = false; | |||
GE_CHK_STATUS_RET(IfInferDepend(ge_model, infer_depend_flag), "[Check][InferDepend] failed."); | |||
if (infer_depend_flag) { | |||
// construct single_op, do single op with HybridModelExecutor | |||
GELOGD("Init hybrid model params of single op, and will do execute with hybrid model executor."); | |||
return InitHybridModelExecutor(resource, ge_model, single_op); | |||
} | |||
return BuildTaskList(&resource, single_op); | |||
} | |||
Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) { | |||
Status SingleOpModel::BuildModelTaskKernel(StreamResource *stream_resource, const TaskDef &task_def, | |||
DynamicSingleOp &single_op) { | |||
auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||
const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : | |||
task_def.kernel_with_handle().context(); | |||
@@ -483,6 +522,10 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl | |||
TbeOpTask *tbe_task = nullptr; | |||
GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); | |||
tbe_task->SetModelArgs(model_name_, model_id_); | |||
if (tbe_task->tiling_buffer_ != nullptr) { | |||
GELOGD("tiling buffer is not nullptr."); | |||
tbe_task->stream_resource_ = stream_resource; | |||
} | |||
single_op.op_task_.reset(tbe_task); | |||
} else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | |||
GELOGD("Building AICPU_CC task"); | |||
@@ -504,10 +547,13 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl | |||
return SUCCESS; | |||
} | |||
Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||
Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &single_op) { | |||
auto ge_model = model_helper_.GetGeModel(); | |||
GE_CHECK_NOTNULL(ge_model); | |||
auto compute_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); | |||
GE_CHECK_NOTNULL(compute_graph); | |||
single_op.compute_graph_ = compute_graph; | |||
auto tasks = ge_model->GetModelTaskDefPtr()->task(); | |||
for (int i = 0; i < tasks.size(); ++i) { | |||
const TaskDef &task_def = tasks[i]; | |||
@@ -521,7 +567,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||
"BuildTaskListForDynamicOp fail for Do not support dynamic op with multiple tasks."); | |||
return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; | |||
} | |||
GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(task_def, single_op)); | |||
GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(stream_resource, task_def, single_op)); | |||
} else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | |||
if (single_op.op_task_ != nullptr) { | |||
GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "[Check][TaskType]Do not support dynamic op with multiple tasks."); | |||
@@ -561,6 +607,7 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & | |||
single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); | |||
GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); | |||
model_params_.memory_size = UINT_MAX; | |||
model_params_.graph_is_dynamic = true; | |||
auto ge_model = model_helper_.GetGeModel(); | |||
GE_CHECK_NOTNULL(ge_model); | |||
@@ -585,6 +632,6 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & | |||
GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "[Init][HybridModelExecutor]Failed."); | |||
return SUCCESS; | |||
} | |||
return BuildTaskListForDynamicOp(single_op); | |||
return BuildTaskListForDynamicOp(&resource, single_op); | |||
} | |||
} // namespace ge |
@@ -40,6 +40,7 @@ struct SingleOpModelParam { | |||
std::map<uintptr_t, int> addr_mapping_; | |||
int64_t core_type = 0; | |||
bool graph_is_dynamic = false; | |||
}; | |||
class SingleOpModel { | |||
@@ -65,15 +66,17 @@ class SingleOpModel { | |||
void ParseOutputNode(const OpDescPtr &op_desc); | |||
Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); | |||
Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); | |||
Status BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &dynamic_single_op); | |||
Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); | |||
Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, | |||
bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id); | |||
Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); | |||
Status BuildModelTaskKernel(const domi::TaskDef &task_def, DynamicSingleOp &single_op); | |||
Status BuildModelTaskKernel(StreamResource *stream_resource, const domi::TaskDef &task_def, | |||
DynamicSingleOp &single_op); | |||
static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam ¶m); | |||
void ParseArgTable(OpTask *task, SingleOp &op); | |||
Status InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, SingleOp &single_op); | |||
std::string model_name_; | |||
uint32_t model_id_ = 0; | |||
@@ -22,6 +22,11 @@ | |||
#include "single_op/single_op_model.h" | |||
namespace ge { | |||
namespace { | |||
// limit available device mem size 1M | |||
const uint32_t kFuzzDeviceBufferSize = 1 * 1024 * 1024; | |||
} | |||
StreamResource::StreamResource(uintptr_t resource_id) : resource_id_(resource_id) { | |||
} | |||
@@ -39,6 +44,17 @@ StreamResource::~StreamResource() { | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed.")); | |||
} | |||
} | |||
if (device_buffer_ != nullptr) { | |||
auto rt_ret = rtFree(device_buffer_); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed.")); | |||
} | |||
} | |||
Status StreamResource::Init() { | |||
auto rt_ret = rtMalloc(&device_buffer_, kFuzzDeviceBufferSize, RT_MEMORY_HBM); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Malloc][Rt] failed.")); | |||
return SUCCESS; | |||
} | |||
SingleOp *StreamResource::GetOperator(const uint64_t key) { | |||
@@ -40,6 +40,7 @@ class StreamResource { | |||
rtStream_t GetStream() const; | |||
void SetStream(rtStream_t stream); | |||
Status Init(); | |||
SingleOp *GetOperator(const uint64_t key); | |||
DynamicSingleOp *GetDynamicOperator(const uint64_t key); | |||
@@ -49,6 +50,9 @@ class StreamResource { | |||
uint8_t *MallocMemory(const std::string &purpose, size_t size, bool holding_lock = true); | |||
uint8_t *MallocWeight(const std::string &purpose, size_t size); | |||
const uint8_t *GetMemoryBase() const; | |||
void *GetDeviceBufferAddr() const { | |||
return device_buffer_; | |||
} | |||
private: | |||
uint8_t *DoMallocMemory(const std::string &purpose, | |||
@@ -65,6 +69,7 @@ class StreamResource { | |||
rtStream_t stream_ = nullptr; | |||
std::mutex mu_; | |||
std::mutex stream_mu_; | |||
void *device_buffer_ = nullptr; | |||
}; | |||
} // namespace ge | |||
@@ -137,7 +137,7 @@ Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id | |||
return SUCCESS; | |||
} | |||
Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) { | |||
Status OpTask::UpdateRunInfo() { | |||
return UNSUPPORTED; | |||
} | |||
@@ -200,14 +200,14 @@ void TbeOpTask::SetHandle(void *handle) { | |||
Status TbeOpTask::LaunchKernel(rtStream_t stream) { | |||
GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); | |||
auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_); | |||
auto ret = rtKernelLaunch(stub_func_, block_dim_, args_.get(), static_cast<uint32_t>(arg_size_), sm_desc, stream); | |||
auto ret = DoLaunchKernel(stream); | |||
int retry_times = 0; | |||
while (ret != RT_ERROR_NONE && retry_times < kLaunchRetryTimes) { | |||
retry_times++; | |||
GELOGW("Retry after %d ms, retry_times: %d", kSleepTime, retry_times); | |||
std::this_thread::sleep_for(std::chrono::milliseconds(kSleepTime)); | |||
ret = rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, sm_desc, stream); | |||
ret = DoLaunchKernel(stream); | |||
} | |||
if (ret != RT_ERROR_NONE) { | |||
@@ -220,8 +220,7 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) { | |||
return SUCCESS; | |||
} | |||
Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) { | |||
GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc)); | |||
Status TbeOpTask::UpdateRunInfo() { | |||
// invoke OpParaCalculate | |||
GELOGD("Start to invoke OpParaCalculate."); | |||
optiling::OpRunInfo run_info; | |||
@@ -235,10 +234,9 @@ Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const ve | |||
block_dim_ = run_info.block_dim; | |||
tiling_data_ = run_info.tiling_data.str(); | |||
tiling_key_ = run_info.tiling_key; | |||
run_info_workspaces_ = run_info.workspaces; | |||
GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_, | |||
tiling_data_.size(), tiling_key_); | |||
GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "[Allocate][Workspaces] failed."); | |||
return SUCCESS; | |||
} | |||
@@ -288,14 +286,33 @@ Status TbeOpTask::UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, cons | |||
return SUCCESS; | |||
} | |||
void TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size) { | |||
Status TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size) { | |||
if (tiling_buffer != nullptr) { | |||
uintptr_t *arg_base = nullptr; | |||
size_t arg_num = 0; | |||
GetIoAddr(arg_base, arg_num); | |||
GE_CHECK_NOTNULL(node); | |||
GE_CHECK_NOTNULL(node->GetOpDesc()); | |||
uint32_t inputs_num = node->GetOpDesc()->GetInputsSize(); | |||
uint32_t outputs_num = node->GetOpDesc()->GetOutputsSize(); | |||
uint32_t workspace_nums = node->GetOpDesc()->GetWorkspace().size(); | |||
uint32_t tiling_index = inputs_num + outputs_num + workspace_nums; | |||
if (arg_num == 0 || arg_num < tiling_index) { | |||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Check][Size]Tiling index %u, arg number %zu is invalid.", | |||
tiling_index, arg_num); | |||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||
} | |||
arg_base[tiling_index] = reinterpret_cast<uintptr_t>(tiling_buffer); | |||
} | |||
node_ = node; | |||
tiling_buffer_ = tiling_buffer; | |||
max_tiling_size_ = max_tiling_size; | |||
return SUCCESS; | |||
} | |||
Status TbeOpTask::AllocateWorkspaces(const vector<int64_t> &workspace_sizes) { | |||
static const std::string kPurpose("malloc workspace memory for dynamic op."); | |||
workspaces_.clear(); | |||
if (workspace_sizes.empty()) { | |||
GELOGD("No need to allocate workspace."); | |||
return SUCCESS; | |||
@@ -333,8 +350,10 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||
vector<GeTensorDesc> &output_desc, | |||
vector<DataBuffer> &output_buffers, | |||
rtStream_t stream) { | |||
GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo(input_desc, output_desc)); | |||
GELOGD("[%s] Start to launch kernel", node_->GetName().c_str()); | |||
GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc)); | |||
GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo()); | |||
GE_CHK_STATUS_RET(AllocateWorkspaces(run_info_workspaces_), "[Allocate][Workspaces] failed."); | |||
std::vector<void *> args; | |||
for (auto &buffer : input_buffers) { | |||
args.emplace_back(buffer.data); | |||
@@ -354,6 +373,15 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||
args.emplace_back(tiling_buffer_); | |||
} | |||
GELOGD("Dst size is %zu, src size is %zu.", arg_size_, args.size() * sizeof(void *)); | |||
// node with workspace: build can not get size of workspace, need to update arg_size_ when execute | |||
if (arg_size_ < (args.size() * sizeof(void *))) { | |||
size_t temp_size = args.size() * sizeof(void *); | |||
GELOGD("Need to reset size of args_ from %zu to %zu.", arg_size_, temp_size); | |||
args_.reset(new(std::nothrow) uint8_t[temp_size]()); | |||
GE_CHECK_NOTNULL(args_); | |||
arg_size_ = temp_size; | |||
} | |||
if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) { | |||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][KernelArgs] failed for [%s].", node_->GetName().c_str()); | |||
REPORT_INNER_ERROR("E19999", "update kernel args failed for %s.", node_->GetName().c_str()); | |||
@@ -361,17 +389,22 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||
} | |||
GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); | |||
GE_CHK_STATUS_RET(DoLaunchKernel(stream), "Failed to do launch kernel."); | |||
return SUCCESS; | |||
} | |||
Status TbeOpTask::DoLaunchKernel(rtStream_t stream) { | |||
auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_); | |||
if (handle_ == nullptr) { | |||
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream)); | |||
GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str()); | |||
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), static_cast<uint32_t>(arg_size_), | |||
sm_desc, stream)); | |||
} else { | |||
std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_); | |||
std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_); | |||
GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), arg_size_, nullptr, | |||
stream, kernel_info.c_str())); | |||
GELOGD("[%s] Done invoking rtKernelLaunchWithHandle successfully", node_->GetName().c_str()); | |||
GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), | |||
static_cast<uint32_t>(arg_size_), sm_desc, stream, kernel_info.c_str())); | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -30,6 +30,7 @@ | |||
#include "cce/aicpu_engine_struct.h" | |||
#include "hybrid/node_executor/aicpu/aicpu_ext_info.h" | |||
#include "init/gelib.h" | |||
#include "register/op_tiling.h" | |||
namespace ge { | |||
class StreamResource; | |||
@@ -39,8 +40,7 @@ class OpTask { | |||
OpTask() = default; | |||
virtual ~OpTask() = default; | |||
virtual Status LaunchKernel(rtStream_t stream) = 0; | |||
virtual Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc, | |||
const vector<GeTensorDesc> &output_desc); | |||
virtual Status UpdateRunInfo(); | |||
virtual Status UpdateArgTable(const SingleOpModelParam ¶m); | |||
void SetModelArgs(std::string model_name, uint32_t model_id); | |||
Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); | |||
@@ -81,22 +81,23 @@ class TbeOpTask : public OpTask { | |||
void SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | |||
const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); | |||
Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc, | |||
const vector<GeTensorDesc> &output_desc) override; | |||
Status UpdateRunInfo() override; | |||
const void *GetArgs() const; | |||
size_t GetArgSize() const; | |||
const std::string &GetStubName() const; | |||
void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); | |||
Status EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size); | |||
const std::string &GetTaskType() const override; | |||
void SetHandle(void *handle); | |||
private: | |||
friend class SingleOpModel; | |||
friend class TbeTaskBuilder; | |||
static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor); | |||
Status UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, | |||
const vector<GeTensorDesc> &output_desc); | |||
Status AllocateWorkspaces(const std::vector<int64_t> &workspace_sizes); | |||
Status DoLaunchKernel(rtStream_t stream); | |||
const void *stub_func_ = nullptr; | |||
std::unique_ptr<uint8_t[]> args_; | |||
@@ -108,6 +109,7 @@ class TbeOpTask : public OpTask { | |||
void *tiling_buffer_ = nullptr; | |||
uint32_t max_tiling_size_ = 0; | |||
std::string tiling_data_; | |||
std::vector<int64_t> run_info_workspaces_; | |||
std::vector<void *> workspaces_; | |||
NodePtr node_; | |||
@@ -308,92 +308,65 @@ Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m | |||
} | |||
Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc) { | |||
size_t arg_size = kernel_def_.args_size(); | |||
auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | |||
GE_CHECK_NOTNULL(args); | |||
auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "[Update][Kernel_def:args] rtMemcpy failed, size = %zu, ret = %d", | |||
arg_size, static_cast<int>(rt_ret)); | |||
REPORT_INNER_ERROR("E19999", "rtMemcpy failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret)); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
auto task_type = static_cast<rtModelTaskType_t>(task_def_.type()); | |||
bool is_task_all_kernel = (task_type == RT_MODEL_TASK_ALL_KERNEL); | |||
size_t arg_size = 0; | |||
std::unique_ptr<uint8_t[]> args = nullptr; | |||
if (is_task_all_kernel) { | |||
GELOGD("SetKernelArgs of %s in branch of RT_MODEL_TASK_ALL_KERNEL.", op_desc->GetName().c_str()); | |||
arg_size = kernel_def_with_handle_.args_size(); | |||
args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | |||
GE_CHECK_NOTNULL(args); | |||
GE_CHK_RT_RET(rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, | |||
RT_MEMCPY_HOST_TO_HOST)) | |||
} else { | |||
GELOGD("SetKernelArgs of %s in branch of RT_MODEL_TASK_KERNEL.", op_desc->GetName().c_str()); | |||
arg_size = kernel_def_.args_size(); | |||
args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | |||
GE_CHECK_NOTNULL(args); | |||
GE_CHK_RT_RET(rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST)) | |||
} | |||
const domi::KernelContext &context = kernel_def_.context(); | |||
const domi::KernelContext &context = task_type == RT_MODEL_TASK_ALL_KERNEL ? | |||
kernel_def_with_handle_.context() : kernel_def_.context(); | |||
const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | |||
uint16_t offset = *args_offset_tmp; | |||
bool is_dynamic = false; | |||
(void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic); | |||
if (is_dynamic) { | |||
GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task)); | |||
} else { | |||
// copy args | |||
std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | |||
void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | |||
uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | |||
rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "[Update][Kernel_def:args] rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret)); | |||
REPORT_INNER_ERROR("E19999", "rtMemcpy failed, ret = %d", static_cast<int>(rt_ret)); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
} | |||
task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc); | |||
// copy args | |||
std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | |||
void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | |||
uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | |||
GE_CHK_RT_RET(rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST)); | |||
return SUCCESS; | |||
} | |||
Status TbeTaskBuilder::SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam ¶m, | |||
const OpDescPtr &op_desc) { | |||
size_t arg_size = kernel_def_with_handle_.args_size(); | |||
auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | |||
GE_CHECK_NOTNULL(args); | |||
auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "[Update][Kernel_def:args]rtMemcpy failed, size = %zu, ret = %d", | |||
arg_size, static_cast<int>(rt_ret)); | |||
REPORT_INNER_ERROR("E19999", "rtMemcpy failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret)); | |||
return rt_ret; | |||
if (is_task_all_kernel) { | |||
task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc, | |||
kernel_def_with_handle_); | |||
} else { | |||
task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc); | |||
} | |||
const domi::KernelContext &context = kernel_def_with_handle_.context(); | |||
const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | |||
uint16_t offset = *args_offset_tmp; | |||
bool is_dynamic = false; | |||
(void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic); | |||
if (is_dynamic) { | |||
GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task)); | |||
} else { | |||
// copy args | |||
std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | |||
void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | |||
uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | |||
rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "[Update][Kernel_def:args] rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret)); | |||
REPORT_INNER_ERROR("E19999", "rtMemcpy failed, ret = %d", static_cast<int>(rt_ret)); | |||
return rt_ret; | |||
if (!param.graph_is_dynamic && task.tiling_buffer_ != nullptr) { | |||
GELOGD("Need to update run info when graph is static with dynamic node: %s.", op_desc->GetName().c_str()); | |||
task.UpdateRunInfo(); | |||
GE_CHK_RT_RET(rtMemcpy(task.tiling_buffer_, task.max_tiling_size_, task.tiling_data_.data(), | |||
task.tiling_data_.size(), RT_MEMCPY_HOST_TO_DEVICE)); | |||
} | |||
} | |||
task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc, | |||
kernel_def_with_handle_); | |||
return SUCCESS; | |||
} | |||
Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m) { | |||
GELOGD("Build tbe task begin"); | |||
auto task_type = static_cast<rtModelTaskType_t>(task_def_.type()); | |||
auto ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? SetKernelWithHandleArgs(task, param, op_desc_) : | |||
SetKernelArgs(task, param, op_desc_); | |||
auto ret = SetKernelArgs(task, param, op_desc_); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
auto task_type = static_cast<rtModelTaskType_t>(task_def_.type()); | |||
ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? RegisterKernelWithHandle(task, param) : | |||
RegisterKernel(task, param); | |||
task.SetHandle(handle_); | |||
@@ -437,7 +410,7 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) { | |||
GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); | |||
} | |||
task.EnableDynamicSupport(node_, tiling_buffer, static_cast<size_t>(max_size)); | |||
task.EnableDynamicSupport(node_, tiling_buffer, static_cast<uint32_t>(max_size)); | |||
return SUCCESS; | |||
} | |||
} // namespace ge |
@@ -97,7 +97,6 @@ class TbeTaskBuilder { | |||
private: | |||
Status InitTilingInfo(TbeOpTask &task); | |||
Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); | |||
Status SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); | |||
Status GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const; | |||
Status RegisterKernel(TbeOpTask &task, const SingleOpModelParam ¶m); | |||
@@ -65,10 +65,12 @@ class GE_FUNC_VISIBILITY GeGenerator { | |||
/// @param [in] inputs: input tensors. | |||
/// @param [in] outputs: output tensors. | |||
/// @param [in] model_file_name: name of model file. | |||
/// @param [in] compile_flag: op build flag, accurate build is 0, fuzz build is 1 | |||
/// @return SUCCESS or FAILED | |||
/// | |||
Status BuildSingleOpModel(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs, | |||
const std::vector<GeTensor> &outputs, const std::string &model_file_name); | |||
const std::vector<GeTensor> &outputs, const std::string &model_file_name, | |||
int32_t compile_flag = 0); | |||
/// | |||
/// @ingroup ge | |||
/// @brief: Build single Op into model buff. | |||
@@ -100,7 +102,7 @@ class GE_FUNC_VISIBILITY GeGenerator { | |||
ge::ModelBufferData &model, bool is_offline = true); | |||
Status BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | |||
const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | |||
bool is_offline = true); | |||
bool is_offline = true, int32_t compile_flag = 0); | |||
bool CheckNoAicore(const ComputeGraphPtr &graph); | |||
void RemoveConst(const vector<GeTensor> &inputs, vector<GeTensor> &outputs); | |||
Status CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs); | |||
@@ -123,6 +123,7 @@ struct OmgContext { | |||
bool need_multi_batch = false; | |||
std::vector<NodePtr> data_nodes; | |||
std::vector<NodePtr> getnext_nosink_nodes; | |||
bool fuzz_compile_flag = false; | |||
}; | |||
} // namespace ge | |||
@@ -278,6 +278,7 @@ set(COMMON_SRC_FILES | |||
"${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" | |||
"${GE_CODE_DIR}/ge/graph/passes/parallel_group_pass.cc" | |||
"${GE_CODE_DIR}/ge/graph/passes/buffer_pool_memory_pass.cc" | |||
"${GE_CODE_DIR}/ge/graph/passes/mark_node_unknown_shape_pass.cc" | |||
"${GE_CODE_DIR}/ge/model/ge_model.cc" | |||
"${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc" | |||
"${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" | |||
@@ -708,6 +709,8 @@ set(PASS_TEST_FILES | |||
"graph/passes/transpose_transdata_pass_unittest.cc" | |||
"graph/passes/parallel_group_pass_unittest.cc" | |||
"graph/passes/buffer_pool_memory_pass_unittest.cc" | |||
"graph/passes/mark_node_unknown_shape_pass_unittest.cc" | |||
"graph/passes/reshape_recovery_pass_unittest.cc" | |||
) | |||
set(KERNEL_TEST_FILES | |||
@@ -799,6 +802,7 @@ set(SINGLE_OP_TEST_FILES | |||
"single_op/single_op_manager_unittest.cc" | |||
"single_op/stream_resource_unittest.cc" | |||
"single_op/single_op_task_unittest.cc" | |||
"single_op/single_op_unittest.cc" | |||
) | |||
set(PROFILING_MNG_TEST_FILES | |||
@@ -45,6 +45,15 @@ ComputeGraphPtr MakeGraph() { | |||
builder.AddDataEdge(data, 0, addn1, 0); | |||
return builder.GetGraph(); | |||
} | |||
static GeAttrValue::NamedAttrs CreateNamedAttrs(const string &name, std::map<string, GeAttrValue> map) { | |||
GeAttrValue::NamedAttrs named_attrs; | |||
named_attrs.SetName(name); | |||
for (auto it : map) { | |||
named_attrs.SetAttr(it.first, it.second); | |||
} | |||
return named_attrs; | |||
} | |||
} // namespace | |||
/* | |||
@@ -85,25 +94,7 @@ TEST_F(UtestGeGenerator, test_build_single_op_online) { | |||
GeGenerator generator; | |||
generator.Initialize({}); | |||
ModelBufferData model_buffer; | |||
EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, model_buffer), FAILED); | |||
} | |||
TEST_F(UtestGeGenerator, test_singleop_fuzz_build) { | |||
GeTensorDesc tensor_desc; | |||
shared_ptr<OpDesc> op_desc = make_shared<OpDesc>("Add", "add"); | |||
op_desc->AddInputDesc(tensor_desc); | |||
op_desc->AddInputDesc(tensor_desc); | |||
op_desc->AddOutputDesc(tensor_desc); | |||
GeTensor tensor(tensor_desc); | |||
const vector<GeTensor> inputs = { tensor, tensor }; | |||
const vector<GeTensor> outputs = { tensor }; | |||
GeGenerator generator; | |||
generator.Initialize({}); | |||
ModelBufferData model_buffer; | |||
bool compile_flag = true; | |||
EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, compile_flag, model_buffer), SUCCESS); | |||
EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, false, model_buffer), FAILED); | |||
} | |||
TEST_F(UtestGeGenerator, test_check_aicore) { | |||
@@ -0,0 +1,115 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include <gtest/gtest.h> | |||
#include <cstdint> | |||
#include <memory> | |||
#include <string> | |||
#define private public | |||
#include "graph/passes/mark_node_unknown_shape_pass.h" | |||
#include "common/ge_inner_error_codes.h" | |||
#include "inc/pass_manager.h" | |||
#include "graph/common/local_context.h" | |||
#undef private | |||
namespace ge { | |||
class UtestMarkNodeUnknownShapePass : public testing::Test { | |||
protected: | |||
void SetUp() {} | |||
void TearDown() {} | |||
public: | |||
NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { | |||
GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||
auto op_desc = std::make_shared<OpDesc>(name, type); | |||
for (auto i = 0; i < in_num; ++i) { | |||
op_desc->AddInputDesc(test_desc); | |||
} | |||
for (auto i = 0; i < out_num; ++i) { | |||
op_desc->AddOutputDesc(test_desc); | |||
} | |||
return graph->AddNode(op_desc); | |||
} | |||
/// netoutput1 | |||
/// | | |||
/// conv1 | |||
/// \ / | |||
/// data | |||
void make_graph(const ComputeGraphPtr &graph) { | |||
GetLocalOmgContext().fuzz_compile_flag = true; | |||
auto conv2d_node = MakeNode(graph, 2, 1, "conv1", "Conv2D"); | |||
{ | |||
auto data1 = MakeNode(graph, 1, 1, "data", "Data"); | |||
GeTensorDesc tensor_desc(GeShape({1,3,224,224}), FORMAT_NCHW, DT_FLOAT); | |||
data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); | |||
data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); | |||
GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); | |||
} | |||
conv2d_node->GetOpDesc()->SetOpKernelLibName("AIcoreEngine"); | |||
AttrUtils::SetBool(conv2d_node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS, true); | |||
auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput"); | |||
GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); | |||
} | |||
}; | |||
TEST_F(UtestMarkNodeUnknownShapePass, test_run_with_GE_kernel) { | |||
OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL); | |||
ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default"); | |||
op_desc->SetOpKernelLibName("GE"); | |||
graph->AddNode(op_desc); | |||
PassManager pass; | |||
pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass); | |||
EXPECT_EQ(pass.Run(graph), SUCCESS); | |||
} | |||
TEST_F(UtestMarkNodeUnknownShapePass, test_run_without_fuzz_attrs) { | |||
OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL); | |||
ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default"); | |||
op_desc->SetOpKernelLibName("AIcoreEngine"); | |||
graph->AddNode(op_desc); | |||
GetLocalOmgContext().fuzz_compile_flag = true; | |||
PassManager pass; | |||
pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass); | |||
EXPECT_EQ(pass.Run(graph), SUCCESS); | |||
} | |||
TEST_F(UtestMarkNodeUnknownShapePass, test_run_with_fuzz_attrs) { | |||
ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph"); | |||
make_graph(graph); | |||
PassManager pass; | |||
pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass); | |||
EXPECT_EQ(pass.Run(graph), SUCCESS); | |||
EXPECT_EQ(graph->GetAllNodes().size(), 3); | |||
for (const auto &node : graph->GetAllNodes()) { | |||
if (node->GetName() == "conv1") { | |||
auto op_desc = node->GetOpDesc(); | |||
EXPECT_NE(op_desc, nullptr); | |||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||
auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i)); | |||
EXPECT_TRUE(input_desc->GetShape().GetDim(0) == -2); | |||
} | |||
for (auto &output_desc : op_desc->GetAllOutputsDescPtr()) { | |||
EXPECT_NE(output_desc, nullptr); | |||
EXPECT_TRUE(output_desc->GetShape().GetDim(0) == -2); | |||
} | |||
} | |||
} | |||
} | |||
} // namespace ge |
@@ -0,0 +1,69 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include "graph/passes/reshape_recovery_pass.h" | |||
#include <gtest/gtest.h> | |||
#include <set> | |||
#include <string> | |||
#include "graph_builder_utils.h" | |||
namespace ge { | |||
class UtestReshapeRecoveryPass : public testing::Test { | |||
protected: | |||
void SetUp() {} | |||
void TearDown() {} | |||
}; | |||
namespace { | |||
/// netoutput1 | |||
/// | \ | |||
///transdata1 \ | |||
/// | \ | |||
/// | transdata2 | |||
/// | / | |||
/// var1 const1 | |||
ut::GraphBuilder Graph1Builder() { | |||
ut::GraphBuilder builder = ut::GraphBuilder("g2"); | |||
auto var1 = builder.AddNode("var1", "Variable", 0, 1, FORMAT_ND, DT_FLOAT, {-1}); | |||
auto const1 = builder.AddNode("const1", "Const", 0, 1, FORMAT_ND, DT_FLOAT, {1, 1, 224, 224}); | |||
auto transdata2 = builder.AddNode("transdata2", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {224, 224}); | |||
auto transdata1 = builder.AddNode("transdata1", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {224, 224}); | |||
auto netoutput1 = builder.AddNode("netoutput1", "Netoutput", 2, 0); | |||
builder.AddDataEdge(var1, 0, transdata1, 0); | |||
builder.AddDataEdge(const1, 0, transdata2, 0); | |||
builder.AddDataEdge(transdata2, 0, netoutput1, 1); | |||
builder.AddDataEdge(transdata1, 0, netoutput1, 0); | |||
return builder; | |||
} | |||
} // namespace | |||
TEST_F(UtestReshapeRecoveryPass, reshape_recovery_with_dynamic_shape) { | |||
auto builder = Graph1Builder(); | |||
auto graph = builder.GetGraph(); | |||
ReshapeRecoveryPass reshape_recovery_pass; | |||
EXPECT_EQ(graph->GetDirectNodesSize(),5); | |||
Status ret = reshape_recovery_pass.Run(graph); | |||
EXPECT_EQ(ret, SUCCESS); | |||
EXPECT_EQ(graph->GetDirectNodesSize(),8); | |||
auto reshape1 = graph->FindNode("Reshape_ReshapeRecoveryPass_0"); | |||
EXPECT_NE(reshape1, nullptr); | |||
} | |||
} // namespace ge |
@@ -0,0 +1,163 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include <gtest/gtest.h> | |||
#include <vector> | |||
#include "runtime/rt.h" | |||
#define protected public | |||
#define private public | |||
#include "single_op/single_op.h" | |||
#include "single_op/single_op_manager.h" | |||
#undef private | |||
#undef protected | |||
using namespace std; | |||
using namespace ge; | |||
class UtestSingleOp : public testing::Test { | |||
protected: | |||
void SetUp() {} | |||
void TearDown() {} | |||
}; | |||
TEST_F(UtestSingleOp, test_dynamic_singleop_execute_async) { | |||
uintptr_t resource_id = 0; | |||
std::mutex stream_mu; | |||
rtStream_t stream = nullptr; | |||
rtStreamCreate(&stream, 0); | |||
DynamicSingleOp dynamic_single_op(resource_id, &stream_mu, stream); | |||
vector<int64_t> dims_vec_0 = {2}; | |||
vector<GeTensorDesc> input_desc; | |||
GeTensorDesc tensor_desc_0(GeShape(dims_vec_0), FORMAT_NCHW, DT_INT32); | |||
// input data from device | |||
AttrUtils::SetInt(tensor_desc_0, ATTR_NAME_PLACEMENT, 0); | |||
input_desc.emplace_back(tensor_desc_0); | |||
vector<DataBuffer> input_buffers; | |||
ge::DataBuffer data_buffer; | |||
data_buffer.data = new char[4]; | |||
data_buffer.length = 4; | |||
input_buffers.emplace_back(data_buffer); | |||
vector<GeTensorDesc> output_desc; | |||
vector<DataBuffer> output_buffers; | |||
// UpdateRunInfo failed | |||
EXPECT_EQ(dynamic_single_op.ExecuteAsync(input_desc, input_buffers, output_desc, output_buffers), ACL_ERROR_GE_PARAM_INVALID); | |||
} | |||
TEST_F(UtestSingleOp, test_dynamic_singleop_execute_async1) { | |||
uintptr_t resource_id = 0; | |||
std::mutex stream_mu; | |||
rtStream_t stream = nullptr; | |||
rtStreamCreate(&stream, 0); | |||
DynamicSingleOp dynamic_single_op(resource_id, &stream_mu, stream); | |||
dynamic_single_op.num_inputs_ = 1; | |||
vector<int64_t> dims_vec_0 = {2}; | |||
vector<GeTensorDesc> input_desc; | |||
GeTensorDesc tensor_desc_0(GeShape(dims_vec_0), FORMAT_NCHW, DT_INT32); | |||
// input data from host | |||
AttrUtils::SetInt(tensor_desc_0, ATTR_NAME_PLACEMENT, 1); | |||
input_desc.emplace_back(tensor_desc_0); | |||
int64_t input_size = 0; | |||
EXPECT_EQ(TensorUtils::GetTensorMemorySizeInBytes(tensor_desc_0, input_size), SUCCESS); | |||
EXPECT_EQ(input_size, 64); | |||
EXPECT_NE(SingleOpManager::GetInstance().GetResource(resource_id, stream), nullptr); | |||
vector<DataBuffer> input_buffers; | |||
ge::DataBuffer data_buffer; | |||
data_buffer.data = new char[4]; | |||
data_buffer.length = 4; | |||
input_buffers.emplace_back(data_buffer); | |||
vector<GeTensorDesc> output_desc; | |||
vector<DataBuffer> output_buffers; | |||
auto *tbe_task = new (std::nothrow) TbeOpTask(); | |||
ge::OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL); | |||
ge::ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default"); | |||
ge::NodePtr node = graph->AddNode(op_desc); | |||
tbe_task->node_ = node; | |||
dynamic_single_op.op_task_.reset((OpTask *)(tbe_task)); | |||
OpDescPtr desc_ptr = MakeShared<OpDesc>("name1", "type1"); | |||
EXPECT_EQ(desc_ptr->AddInputDesc("x", GeTensorDesc(GeShape({2}), FORMAT_NCHW)), GRAPH_SUCCESS); | |||
dynamic_single_op.op_task_->op_desc_ = desc_ptr; | |||
// UpdateRunInfo failed | |||
EXPECT_EQ(dynamic_single_op.ExecuteAsync(input_desc, input_buffers, output_desc, output_buffers), PARAM_INVALID); | |||
} | |||
TEST_F(UtestSingleOp, test_singleop_execute_async1) { | |||
StreamResource *res = new (std::nothrow) StreamResource(1); | |||
std::mutex stream_mu; | |||
rtStream_t stream = nullptr; | |||
rtStreamCreate(&stream, 0); | |||
SingleOp single_op(res, &stream_mu, stream); | |||
vector<DataBuffer> input_buffers; | |||
ge::DataBuffer data_buffer; | |||
data_buffer.data = new char[4]; | |||
data_buffer.length = 4; | |||
data_buffer.placement = 1; | |||
input_buffers.emplace_back(data_buffer); | |||
vector<DataBuffer> output_buffers; | |||
single_op.input_sizes_.emplace_back(4); | |||
SingleOpModelParam model_params; | |||
single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params)); | |||
single_op.args_.resize(1); | |||
EXPECT_EQ(single_op.hybrid_model_executor_, nullptr); | |||
EXPECT_EQ(single_op.running_param_->mem_base, nullptr); | |||
EXPECT_EQ(single_op.tasks_.size(), 0); | |||
EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), SUCCESS); | |||
} | |||
TEST_F(UtestSingleOp, test_singleop_execute_async2) { | |||
StreamResource *res = new (std::nothrow) StreamResource(1); | |||
std::mutex stream_mu; | |||
rtStream_t stream = nullptr; | |||
rtStreamCreate(&stream, 0); | |||
SingleOp single_op(res, &stream_mu, stream); | |||
vector<DataBuffer> input_buffers; | |||
ge::DataBuffer data_buffer; | |||
data_buffer.data = new char[4]; | |||
data_buffer.length = 4; | |||
data_buffer.placement = 1; | |||
input_buffers.emplace_back(data_buffer); | |||
vector<DataBuffer> output_buffers; | |||
single_op.input_sizes_.emplace_back(4); | |||
SingleOpModelParam model_params; | |||
single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params)); | |||
single_op.args_.resize(1); | |||
GeTensorDesc tensor_desc(GeShape({1}), FORMAT_NHWC, DT_UINT64); | |||
single_op.inputs_desc_.emplace_back(tensor_desc); | |||
std::shared_ptr<ge::GeRootModel> root_model = ge::MakeShared<ge::GeRootModel>(); | |||
single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model)); | |||
single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(), 0, stream)); | |||
EXPECT_EQ(single_op.running_param_->mem_base, nullptr); | |||
EXPECT_EQ(single_op.tasks_.size(), 0); | |||
EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), PARAM_INVALID); | |||
} |