Browse Source

!1507 Adaptation run package 0412

From: @shenwei41
Reviewed-by: @lilongfei15,@liucunwei
Signed-off-by: @lilongfei15,@liucunwei
tags/v1.2.0
mindspore-ci-bot Gitee 4 years ago
parent
commit
c144b4bb9e
45 changed files with 626 additions and 190 deletions
  1. +1
    -1
      ge/common/dump/dump_manager.cc
  2. +36
    -7
      ge/common/dump/dump_op.cc
  3. +3
    -1
      ge/common/dump/dump_op.h
  4. +8
    -7
      ge/common/dump/dump_properties.cc
  5. +11
    -0
      ge/common/tbe_kernel_store.cc
  6. +42
    -7
      ge/generator/ge_generator.cc
  7. +0
    -55
      ge/graph/build/graph_builder.cc
  8. +46
    -0
      ge/graph/build/model_builder.cc
  9. +2
    -0
      ge/graph/build/model_builder.h
  10. +15
    -8
      ge/graph/load/model_manager/davinci_model.cc
  11. +12
    -1
      ge/graph/load/model_manager/davinci_model.h
  12. +4
    -4
      ge/graph/load/model_manager/model_manager.cc
  13. +1
    -4
      ge/graph/load/model_manager/task_info/end_graph_task_info.cc
  14. +1
    -2
      ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
  15. +2
    -6
      ge/graph/load/model_manager/task_info/kernel_task_info.cc
  16. +33
    -0
      ge/graph/passes/atomic_addr_clean_pass.cc
  17. +8
    -0
      ge/graph/passes/atomic_addr_clean_pass.h
  18. +2
    -1
      ge/graph/preprocess/insert_op/ge_aipp_op.cc
  19. +8
    -4
      ge/graph/preprocess/insert_op/util_insert_aipp_op.cc
  20. +1
    -1
      ge/hybrid/executor/hybrid_execution_context.h
  21. +0
    -4
      ge/hybrid/executor/hybrid_model_async_executor.cc
  22. +0
    -3
      ge/hybrid/executor/hybrid_model_async_executor.h
  23. +15
    -7
      ge/hybrid/executor/hybrid_model_executor.cc
  24. +10
    -0
      ge/hybrid/executor/hybrid_model_pipeline_executor.cc
  25. +14
    -10
      ge/hybrid/executor/worker/execution_engine.cc
  26. +4
    -5
      ge/hybrid/hybrid_davinci_model.cc
  27. +1
    -1
      ge/hybrid/hybrid_davinci_model.h
  28. +1
    -1
      ge/hybrid/hybrid_davinci_model_stub.cc
  29. +20
    -0
      ge/hybrid/model/hybrid_model.cc
  30. +10
    -3
      ge/hybrid/model/hybrid_model.h
  31. +113
    -5
      ge/hybrid/model/hybrid_model_builder.cc
  32. +8
    -0
      ge/hybrid/model/hybrid_model_builder.h
  33. +43
    -14
      ge/hybrid/node_executor/aicore/aicore_op_task.cc
  34. +8
    -0
      ge/hybrid/node_executor/aicore/aicore_op_task.h
  35. +1
    -0
      ge/hybrid/node_executor/aicore/aicore_task_builder.cc
  36. +27
    -15
      ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
  37. +2
    -3
      ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h
  38. +3
    -3
      ge/single_op/single_op_model.cc
  39. +11
    -3
      ge/single_op/task/op_task.cc
  40. +1
    -0
      inc/framework/generator/ge_generator.h
  41. +1
    -1
      metadef
  42. +1
    -1
      parser
  43. +2
    -0
      tests/ut/ge/CMakeLists.txt
  44. +61
    -0
      tests/ut/ge/common/dump_op_unittest.cc
  45. +33
    -2
      tests/ut/ge/graph/passes/atomic_addr_clean_pass_unittest.cc

+ 1
- 1
ge/common/dump/dump_manager.cc View File

@@ -96,7 +96,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf
dump_mode = dump_config.dump_mode; dump_mode = dump_config.dump_mode;
GELOGI("Dump mode is %s", dump_mode.c_str()); GELOGI("Dump mode is %s", dump_mode.c_str());
dump_properties.SetDumpMode(dump_mode); dump_properties.SetDumpMode(dump_mode);
dump_properties_map_.emplace(kInferSessionId, dump_properties);
dump_properties_map_[kInferSessionId] = dump_properties;


return SUCCESS; return SUCCESS;
} }


+ 36
- 7
ge/common/dump/dump_op.cc View File

@@ -20,6 +20,7 @@
#include "common/ge/datatype_util.h" #include "common/ge/datatype_util.h"
#include "framework/common/debug/ge_log.h" #include "framework/common/debug/ge_log.h"
#include "framework/common/util.h" #include "framework/common/util.h"
#include "framework/common/types.h"
#include "graph/anchor.h" #include "graph/anchor.h"
#include "graph/ge_tensor.h" #include "graph/ge_tensor.h"
#include "graph/op_desc.h" #include "graph/op_desc.h"
@@ -55,8 +56,10 @@ void DumpOp::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond
loop_cond_ = reinterpret_cast<uintptr_t>(loop_cond); loop_cond_ = reinterpret_cast<uintptr_t>(loop_cond);
} }


void DumpOp::SetDynamicModelInfo(const string &dynamic_model_name, uint32_t dynamic_model_id) {
void DumpOp::SetDynamicModelInfo(const string &dynamic_model_name, const string &dynamic_om_name,
uint32_t dynamic_model_id) {
dynamic_model_name_ = dynamic_model_name; dynamic_model_name_ = dynamic_model_name;
dynamic_om_name_ = dynamic_om_name;
dynamic_model_id_ = dynamic_model_id; dynamic_model_id_ = dynamic_model_id;
} }


@@ -200,6 +203,32 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) {
return SUCCESS; return SUCCESS;
} }


Status DumpOp::SetDumpModelName(aicpu::dump::OpMappingInfo &op_mapping_info) {
if (dynamic_model_name_.empty() && dynamic_om_name_.empty()) {
GELOGI("Single op dump, no need set model name");
return SUCCESS;
}
std::set<std::string> model_list = dump_properties_.GetAllDumpModel();
bool not_find_by_omname = model_list.find(dynamic_om_name_) == model_list.end();
bool not_find_by_modelname = model_list.find(dynamic_model_name_) == model_list.end();
std::string dump_model_name = not_find_by_omname ? dynamic_model_name_ : dynamic_om_name_;
if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) {
if (not_find_by_omname && not_find_by_modelname) {
std::string model_list_str;
for (auto &model : model_list) {
model_list_str += "[" + model + "].";
}
GELOGW("Model %s will not be set to dump, dump list: %s", dump_model_name.c_str(), model_list_str.c_str());
return FAILED;
}
}
if (!dump_model_name.empty() && dump_properties_.IsDumpOpen()) {
GELOGD("Dump model name is %s", dump_model_name.c_str());
op_mapping_info.set_model_name(dump_model_name);
}
return SUCCESS;
}

Status DumpOp::LaunchDumpOp() { Status DumpOp::LaunchDumpOp() {
GELOGI("Start to launch dump op %s", op_desc_->GetName().c_str()); GELOGI("Start to launch dump op %s", op_desc_->GetName().c_str());
int32_t device_id = 0; int32_t device_id = 0;
@@ -209,8 +238,7 @@ Status DumpOp::LaunchDumpOp() {
return RT_ERROR_TO_GE_STATUS(rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);
} }
if (device_id < 0) { if (device_id < 0) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,
"Check device_id failed, device_id = %d, which should be not less than 0.",
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Check device_id failed, device_id = %d, which should be not less than 0.",
device_id); device_id);
return ACL_ERROR_GE_INTERNAL_ERROR; return ACL_ERROR_GE_INTERNAL_ERROR;
} }
@@ -220,11 +248,12 @@ Status DumpOp::LaunchDumpOp() {
op_mapping_info.set_flag(kAicpuLoadFlag); op_mapping_info.set_flag(kAicpuLoadFlag);
op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); op_mapping_info.set_dump_step(dump_properties_.GetDumpStep());
op_mapping_info.set_model_id(dynamic_model_id_); op_mapping_info.set_model_id(dynamic_model_id_);
if (!dynamic_model_name_.empty() && dump_properties_.IsDumpOpen()) {
op_mapping_info.set_model_name(dynamic_model_name_);

if (SetDumpModelName(op_mapping_info) != SUCCESS) {
return SUCCESS;
} }
SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(),
GELOGI("Dump step is %s ,dump path is %s in Launch dump op", dump_properties_.GetDumpStep().c_str(),
dump_path.c_str()); dump_path.c_str());
uint32_t task_id = 0; uint32_t task_id = 0;
uint32_t stream_id = 0; uint32_t stream_id = 0;
@@ -273,4 +302,4 @@ Status DumpOp::LaunchDumpOp() {
} }
return SUCCESS; return SUCCESS;
} }
} // namesapce ge
} // namespace ge

+ 3
- 1
ge/common/dump/dump_op.h View File

@@ -34,12 +34,13 @@ class DumpOp {
vector<uintptr_t> output_addrs, rtStream_t stream); vector<uintptr_t> output_addrs, rtStream_t stream);
Status LaunchDumpOp(); Status LaunchDumpOp();
void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond); void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond);
void SetDynamicModelInfo(const string &dynamic_model_name, uint32_t dynamic_model_id);
void SetDynamicModelInfo(const string &dynamic_model_name, const string &dynamic_om_name, uint32_t dynamic_model_id);


private: private:
Status ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info); Status ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info);
Status DumpOutput(aicpu::dump::Task &task); Status DumpOutput(aicpu::dump::Task &task);
Status DumpInput(aicpu::dump::Task &task); Status DumpInput(aicpu::dump::Task &task);
Status SetDumpModelName(aicpu::dump::OpMappingInfo &op_mapping_info);


DumpProperties dump_properties_; DumpProperties dump_properties_;
OpDescPtr op_desc_; OpDescPtr op_desc_;
@@ -54,6 +55,7 @@ class DumpOp {
uintptr_t loop_cond_; uintptr_t loop_cond_;


std::string dynamic_model_name_; std::string dynamic_model_name_;
std::string dynamic_om_name_;
std::uint32_t dynamic_model_id_; std::uint32_t dynamic_model_id_;
}; };
} // namespace ge } // namespace ge


+ 8
- 7
ge/common/dump/dump_properties.cc View File

@@ -35,14 +35,14 @@ const std::string kDumpStatusOpen = "on";
const uint32_t kAicoreOverflow = (0x1 << 0); const uint32_t kAicoreOverflow = (0x1 << 0);
const uint32_t kAtomicOverflow = (0x1 << 1); const uint32_t kAtomicOverflow = (0x1 << 1);
const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow);
}
} // namespace
namespace ge { namespace ge {
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) { FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) {
CopyFrom(other); CopyFrom(other);
} }


FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties::operator=( FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties::operator=(
const DumpProperties &other) {
const DumpProperties &other) {
CopyFrom(other); CopyFrom(other);
return *this; return *this;
} }
@@ -97,7 +97,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti


// The following is the new dump scenario of the fusion operator // The following is the new dump scenario of the fusion operator
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::AddPropertyValue( FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::AddPropertyValue(
const std::string &model, const std::set<std::string> &layers) {
const std::string &model, const std::set<std::string> &layers) {
for (const std::string &layer : layers) { for (const std::string &layer : layers) {
GELOGI("This model %s config to dump layer %s", model.c_str(), layer.c_str()); GELOGI("This model %s config to dump layer %s", model.c_str(), layer.c_str());
} }
@@ -138,7 +138,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpPrope
} }


FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpProperties::GetPropertyValue( FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpProperties::GetPropertyValue(
const std::string &model) const {
const std::string &model) const {
auto iter = model_dump_properties_map_.find(model); auto iter = model_dump_properties_map_.find(model);
if (iter != model_dump_properties_map_.end()) { if (iter != model_dump_properties_map_.end()) {
return iter->second; return iter->second;
@@ -147,8 +147,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpPrope
} }


FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsLayerNeedDump( FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsLayerNeedDump(
const std::string &model, const std::string &om_name, const std::string &op_name) const {
const std::string &model, const std::string &om_name, const std::string &op_name) const {
// if dump all // if dump all
GELOGD("model name is %s om name is %s op is %s in layer need dump", model.c_str(), om_name.c_str(), op_name.c_str());
if (model_dump_properties_map_.find(DUMP_ALL_MODEL) != model_dump_properties_map_.end()) { if (model_dump_properties_map_.find(DUMP_ALL_MODEL) != model_dump_properties_map_.end()) {
return true; return true;
} }
@@ -203,7 +204,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperti
} }


FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch( FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch(
const std::string &dump_op_switch) {
const std::string &dump_op_switch) {
dump_op_switch_ = dump_op_switch; dump_op_switch_ = dump_op_switch;
} }


@@ -270,4 +271,4 @@ void DumpProperties::SetDumpDebugOptions() {
GELOGI("ge.exec.enableDumpDebug is false or is not set."); GELOGI("ge.exec.enableDumpDebug is false or is not set.");
} }
} }
} // namespace
} // namespace ge

+ 11
- 0
ge/common/tbe_kernel_store.cc View File

@@ -15,6 +15,8 @@
*/ */


#include "common/tbe_kernel_store.h" #include "common/tbe_kernel_store.h"
#include "graph/utils/attr_utils.h"
#include "graph/debug/ge_attr_define.h"


namespace ge { namespace ge {


@@ -31,6 +33,15 @@ void TBEKernelStore::LoadTBEKernelBinToOpDesc(const std::shared_ptr<ge::OpDesc>
GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, kernel_bin), GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, kernel_bin),
GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for kernel_bin failed");) GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for kernel_bin failed");)
GELOGI("Load tbe kernel:%s, %zu", kernel_bin->GetName().c_str(), kernel_bin->GetBinDataSize()); GELOGI("Load tbe kernel:%s, %zu", kernel_bin->GetName().c_str(), kernel_bin->GetBinDataSize());

std::string atomic_kernel_name;
(void) AttrUtils::GetStr(op_desc, ATOMIC_ATTR_TBE_KERNEL_NAME, atomic_kernel_name);
if (!atomic_kernel_name.empty()) {
GELOGI("Get atomic kernel name is %s.", atomic_kernel_name.c_str());
auto atomic_kernel_bin = FindKernel(atomic_kernel_name);
GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(EXT_ATTR_ATOMIC_TBE_KERNEL, atomic_kernel_bin),
GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for atomic kernel_bin failed");)
}
} }
} }
} }


+ 42
- 7
ge/generator/ge_generator.cc View File

@@ -67,6 +67,9 @@ bool ContainsDynamicInpus(const ge::OpDesc &op_desc) {
} }
return false; return false;
} }
bool IsOptional(const ge::GeTensorDesc &tensor_desc) {
return tensor_desc.GetFormat() == ge::FORMAT_RESERVED && tensor_desc.GetDataType() == ge::DT_UNDEFINED;
}
} // namespace } // namespace


namespace ge { namespace ge {
@@ -154,7 +157,7 @@ static Status CheckEngineTypeSupport(const NodePtr &node, OpEngineType engine_ty
} }


static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index, static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index,
bool attr) {
bool attr, int32_t &data_index) {
GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID);
GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID);


@@ -197,9 +200,10 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const
"[Add][InputDesc]fail for node:%s", data_op->GetName().c_str()); "[Add][InputDesc]fail for node:%s", data_op->GetName().c_str());
GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED,
"[Add][OutputDesc]fail for node:%s", data_op->GetName().c_str()); "[Add][OutputDesc]fail for node:%s", data_op->GetName().c_str());
if (attr) {
GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, index), return FAILED,
if (attr && !is_const) {
GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, data_index), return FAILED,
"[Set][Attr:%s]fail for node:%s", ATTR_NAME_INDEX.c_str(), data_op->GetName().c_str()); "[Set][Attr:%s]fail for node:%s", ATTR_NAME_INDEX.c_str(), data_op->GetName().c_str());
++data_index;
} }


ge::NodePtr arg_node = graph->AddNode(data_op); ge::NodePtr arg_node = graph->AddNode(data_op);
@@ -691,6 +695,34 @@ namespace {
} }
return SUCCESS; return SUCCESS;
} }

bool CheckNoAicore(const ComputeGraphPtr &graph) {
for (const auto &node : graph->GetDirectNode()) {
if (node == nullptr) {
continue;
}
auto op_desc = node->GetOpDesc();
if (op_desc == nullptr) {
continue;
}
if (op_desc->GetOpEngineName() == kAIcoreEngine) {
return false;
}
}
return true;
}
}

void GeGenerator::RemoveConst(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) {
for (auto &input : inputs) {
GeTensorDesc input_desc = input.GetTensorDesc();
bool is_const = false;
(void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const);
bool is_optional = IsOptional(input_desc);
if (!is_optional && !is_const) {
outputs.emplace_back(input);
}
}
} }


Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
@@ -757,7 +789,9 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
GELOGI("ATC parser success in single op build."); GELOGI("ATC parser success in single op build.");


GeRootModelPtr ge_root_model = nullptr; GeRootModelPtr ge_root_model = nullptr;
GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, ge_root_model));
vector<GeTensor> data_inputs;
RemoveConst(inputs, data_inputs);
GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, data_inputs, ge_root_model));
map<string, GeAttrValue> op_attrs = op_desc_tmp->GetAllAttrs(); map<string, GeAttrValue> op_attrs = op_desc_tmp->GetAllAttrs();
GE_CHECK_NOTNULL(ge_root_model); GE_CHECK_NOTNULL(ge_root_model);
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
@@ -773,7 +807,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in


bool all_shape = false; bool all_shape = false;
(void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape); (void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape);
if (all_shape) {
if (all_shape && CheckNoAicore(root_graph)) {
GELOGD("Get aicpu all_shape kernel!"); GELOGD("Get aicpu all_shape kernel!");
vector<GeTensor> inputs_dynamic; vector<GeTensor> inputs_dynamic;
vector<GeTensor> outputs_dynamic; vector<GeTensor> outputs_dynamic;
@@ -840,18 +874,19 @@ Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor


// 2. Create InputData node. // 2. Create InputData node.
int32_t arg_index = 0; int32_t arg_index = 0;
int32_t data_index = 0;
if (inputs.empty()) { if (inputs.empty()) {
for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) { for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) {
GE_CHECK_NOTNULL_EXEC(input_desc, return INTERNAL_ERROR); GE_CHECK_NOTNULL_EXEC(input_desc, return INTERNAL_ERROR);
if (!IsNeedConnectInputOpForSingleOp(*input_desc)) { if (!IsNeedConnectInputOpForSingleOp(*input_desc)) {
continue; continue;
} }
GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false));
GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false, data_index));
arg_index++; arg_index++;
} }
} else { } else {
for (const auto &in_desc : inputs) { for (const auto &in_desc : inputs) {
GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, in_desc.GetTensorDesc(), arg_index, true));
GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, in_desc.GetTensorDesc(), arg_index, true, data_index));
arg_index++; arg_index++;
} }
} }


+ 0
- 55
ge/graph/build/graph_builder.cc View File

@@ -382,58 +382,6 @@ Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPt
return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id); return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id);
} }


static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor,
const std::vector<InDataAnchorPtr> &in_anchors, const std::string &name) {
GE_CHECK_NOTNULL(out_anchor);
NodePtr in_node = out_anchor->GetOwnerNode();
GE_CHECK_NOTNULL(in_node);
OpDescBuilder op_desc_builder(name, MEMCPYASYNC);
OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0))
.AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0))
.Build();
(void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false);
if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) {
GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str());
return FAILED;
}
return SUCCESS;
}

static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) {
if (graph->GetGraphUnknownFlag()) {
GELOGI("Graph %s is unknown graph, ignore gen_task for constant.", graph->GetName().c_str());
return SUCCESS;
}
for (auto &node : graph->GetDirectNode()) {
// CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT
auto op_desc = node->GetOpDesc();
if (op_desc == nullptr) {
continue;
}
auto op_type = op_desc->GetType();
if (op_type == NETOUTPUT) {
for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) {
const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
NodePtr in_node = peer_out_anchor->GetOwnerNode();
GE_CHECK_NOTNULL(in_node);

std::string in_node_op_type = in_node->GetType();
if (in_node_op_type == CONSTANT) {
GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str());
std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy";
if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) {
GELOGE(FAILED, "Insert memcpy between %s and %s failed.",
in_node->GetName().c_str(), node->GetName().c_str());
return FAILED;
}
}
}
}
}
return SUCCESS;
}

Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) {
bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag(); bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag();
com_graph->SetGraphUnknownFlag(false); com_graph->SetGraphUnknownFlag(false);
@@ -516,9 +464,6 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
!sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) {
continue; continue;
} }

GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed.");

if (sub_graph->GetGraphUnknownFlag()) { if (sub_graph->GetGraphUnknownFlag()) {
// unknown shape build flow // unknown shape build flow
GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id),


+ 46
- 0
ge/graph/build/model_builder.cc View File

@@ -574,6 +574,50 @@ Status ModelBuilder::MergeWeights() {
return SUCCESS; return SUCCESS;
} }


Status ModelBuilder::SaveAtomicTBEKernel(const OpDescPtr &op_desc) {
ge::NodePtr atomic_clean_node = nullptr;
atomic_clean_node = op_desc->TryGetExtAttr("atomic_clean_node_ptr", atomic_clean_node);
if (atomic_clean_node == nullptr) {
return SUCCESS;
}

ge::OpDescPtr atomic_op_desc = atomic_clean_node->GetOpDesc();
GE_CHECK_NOTNULL(atomic_op_desc);
TBEKernelPtr tbe_kernel = atomic_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
if (tbe_kernel == nullptr) {
std::string kernel_name;
GeAttrValue::BYTES kernel_buffer;
(void) AttrUtils::GetStr(atomic_op_desc, ATTR_NAME_TBE_KERNEL_NAME, kernel_name);
(void) AttrUtils::GetBytes(atomic_op_desc, ATTR_NAME_TBE_KERNEL_BUFFER, kernel_buffer);
if (!kernel_name.empty() && (kernel_buffer.GetSize() > 0)) {
GE_CHECK_NOTNULL(kernel_buffer.GetData());
std::vector<char> data(kernel_buffer.GetData(), kernel_buffer.GetData() + kernel_buffer.GetSize());
tbe_kernel = MakeShared<OpKernelBin>(kernel_name, std::move(data));
GE_CHECK_NOTNULL(tbe_kernel);
}
}
if (tbe_kernel == nullptr) {
GELOGD("Atomic_clean_node doesn't have tbe_kernel.");
return SUCCESS;
}
tbe_kernel_store_.AddTBEKernel(tbe_kernel);
GELOGD("Atomic_clean_node tbe_kernel_name %s!", tbe_kernel->GetName().c_str());
(void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TBE_KERNEL_NAME, tbe_kernel->GetName());

std::string kernel_name;
(void) AttrUtils::GetStr(atomic_op_desc, atomic_op_desc->GetName() + "_kernelname", kernel_name);
(void) AttrUtils::SetStr(op_desc, op_desc->GetName() + "_atomic_kernelname", kernel_name);

std::string meta_data;
(void) AttrUtils::GetStr(atomic_op_desc, TVM_ATTR_NAME_METADATA, meta_data);
(void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TVM_METADATA, meta_data);

std::string json_string;
(void) AttrUtils::GetStr(atomic_op_desc, TVM_ATTR_NAME_MAGIC, json_string);
(void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TVM_MAGIC, json_string);
return SUCCESS;
}

Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
// Add weight // Add weight
ge_model.SetWeight(weight_buffer_); ge_model.SetWeight(weight_buffer_);
@@ -607,6 +651,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
} }
tbe_name_set.insert(tbe_kernel->GetName()); tbe_name_set.insert(tbe_kernel->GetName());
tbe_kernel_store_.AddTBEKernel(tbe_kernel); tbe_kernel_store_.AddTBEKernel(tbe_kernel);

GE_CHK_STATUS_RET(SaveAtomicTBEKernel(node_op_desc), "[Save][TBEKernel] save atomic tbekernel failed!");
} }


SetModelCheckAicpuAttr(model, aicpu_op_types, aicpu_tf_op_types); SetModelCheckAicpuAttr(model, aicpu_op_types, aicpu_tf_op_types);


+ 2
- 0
ge/graph/build/model_builder.h View File

@@ -89,6 +89,8 @@ class ModelBuilder {
void SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types, void SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types,
std::set<std::string> &aicpu_tf_op_types); std::set<std::string> &aicpu_tf_op_types);


Status SaveAtomicTBEKernel(const OpDescPtr &op_desc);

uint64_t session_id_; uint64_t session_id_;


map<int64_t, size_t> mem_type_to_mem_offset_; map<int64_t, size_t> mem_type_to_mem_offset_;


+ 15
- 8
ge/graph/load/model_manager/davinci_model.cc View File

@@ -3067,9 +3067,8 @@ Status DavinciModel::DistributeTask() {
task_def.kernel_ex().op_index()); task_def.kernel_ex().op_index());
OpDescPtr op = GetOpByIndex(op_index); OpDescPtr op = GetOpByIndex(op_index);
GE_CHECK_NOTNULL(op); GE_CHECK_NOTNULL(op);

if (reinterpret_cast<void *>(task->GetDumpArgs()) != nullptr) { if (reinterpret_cast<void *>(task->GetDumpArgs()) != nullptr) {
bool call_dump = GetDumpProperties().IsLayerNeedDump(name_, om_name_, op->GetName()) && task->CallSaveDumpInfo();
bool call_dump = OpNeedDump(op->GetName()) && task->CallSaveDumpInfo();
if (call_dump || is_op_debug_reg_) { if (call_dump || is_op_debug_reg_) {
SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs());
} }
@@ -3089,11 +3088,16 @@ Status DavinciModel::DistributeTask() {
return SUCCESS; return SUCCESS;
} }


void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) {
bool DavinciModel::ModelNeedDump() {
auto all_dump_model = GetDumpProperties().GetAllDumpModel(); auto all_dump_model = GetDumpProperties().GetAllDumpModel();
bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end();
bool findByModelName = all_dump_model.find(name_) != all_dump_model.end();
if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || findByOmName || findByModelName) {
bool ret = all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() ||
all_dump_model.find(dump_model_name_) != all_dump_model.end() ||
all_dump_model.find(om_name_) != all_dump_model.end();
return ret;
}

void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) {
if (ModelNeedDump()) {
GELOGI("start save end_graph_info to dumper, task_id is %u, stream_id is %u", task_id, stream_id); GELOGI("start save end_graph_info to dumper, task_id is %u, stream_id is %u", task_id, stream_id);
data_dumper_.SaveEndGraphId(task_id, stream_id); data_dumper_.SaveEndGraphId(task_id, stream_id);
} }
@@ -3893,7 +3897,10 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id)
} }


void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name) { void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name) {
data_dumper_.SetModelName(name_);
if(dump_model_name_.empty()) {
dump_model_name_ = name_;
}
data_dumper_.SetModelName(dump_model_name_);
data_dumper_.SetModelId(model_id_); data_dumper_.SetModelId(model_id_);
data_dumper_.SetOmName(om_name_); data_dumper_.SetOmName(om_name_);
data_dumper_.SetComputeGraph(graph); data_dumper_.SetComputeGraph(graph);
@@ -4082,7 +4089,7 @@ int64_t DavinciModel::GetFixedAddrsSize(string tensor_name) {
Status DavinciModel::InitL1DataDumperArgs() { Status DavinciModel::InitL1DataDumperArgs() {
auto all_dump_model = GetDumpProperties().GetAllDumpModel(); auto all_dump_model = GetDumpProperties().GetAllDumpModel();
bool find_by_om_name = all_dump_model.find(om_name_) != all_dump_model.end(); bool find_by_om_name = all_dump_model.find(om_name_) != all_dump_model.end();
bool find_by_model_name = all_dump_model.find(name_) != all_dump_model.end();
bool find_by_model_name = all_dump_model.find(dump_model_name_) != all_dump_model.end();
bool dump_l1fusion_op = bool dump_l1fusion_op =
(all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) || find_by_om_name || find_by_model_name; (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) || find_by_om_name || find_by_model_name;
if (dump_l1fusion_op) { if (dump_l1fusion_op) {


+ 12
- 1
ge/graph/load/model_manager/davinci_model.h View File

@@ -248,7 +248,10 @@ class DavinciModel {
string Name() const { return name_; } string Name() const { return name_; }


// om_name // om_name
string OmName() const { return om_name_; }
const string &OmName() const { return om_name_; }

// dump_model_name
const string &DumpModelName() const { return dump_model_name_; }


// version // version
uint32_t Version() const { return version_; } uint32_t Version() const { return version_; }
@@ -483,6 +486,12 @@ class DavinciModel {
data_dumper_.DumpShrink(); data_dumper_.DumpShrink();
} }


bool OpNeedDump(const string &op_name) {
return GetDumpProperties().IsLayerNeedDump(dump_model_name_, om_name_, op_name);
}

bool ModelNeedDump();

void SetEndGraphId(uint32_t task_id, uint32_t stream_id); void SetEndGraphId(uint32_t task_id, uint32_t stream_id);
DavinciModel &operator=(const DavinciModel &model) = delete; DavinciModel &operator=(const DavinciModel &model) = delete;


@@ -542,6 +551,7 @@ class DavinciModel {


// om file name // om file name
void SetOmName(const string &om_name) { om_name_ = om_name; } void SetOmName(const string &om_name) { om_name_ = om_name; }
void SetDumpModelName(const string &dump_model_name) { dump_model_name_ = dump_model_name; }


void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); } void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); }
const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); } const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); }
@@ -888,6 +898,7 @@ class DavinciModel {


// used for inference data dump // used for inference data dump
string om_name_; string om_name_;
string dump_model_name_;


uint32_t version_; uint32_t version_;
GeModelPtr ge_model_; // release after DavinciModel::Init GeModelPtr ge_model_; // release after DavinciModel::Init


+ 4
- 4
ge/graph/load/model_manager/model_manager.cc View File

@@ -271,7 +271,7 @@ ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector<uin
return SUCCESS; return SUCCESS;
} }


ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string &model_name,
ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string &om_name,
const shared_ptr<ge::GeRootModel> &ge_root_model, const shared_ptr<ge::GeRootModel> &ge_root_model,
const shared_ptr<ModelListener> &listener) { const shared_ptr<ModelListener> &listener) {
auto hybrid_model = hybrid::HybridDavinciModel::Create(ge_root_model); auto hybrid_model = hybrid::HybridDavinciModel::Create(ge_root_model);
@@ -279,7 +279,7 @@ ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string
hybrid_model->SetListener(listener); hybrid_model->SetListener(listener);
hybrid_model->SetModelId(model_id); hybrid_model->SetModelId(model_id);
hybrid_model->SetDeviceId(GetContext().DeviceId()); hybrid_model->SetDeviceId(GetContext().DeviceId());
hybrid_model->SetModelName(model_name);
hybrid_model->SetOmName(om_name);
GE_CHK_STATUS_RET(hybrid_model->Init(), "Failed to init hybrid model. model_id = %u", model_id); GE_CHK_STATUS_RET(hybrid_model->Init(), "Failed to init hybrid model. model_id = %u", model_id);
auto shared_model = std::shared_ptr<hybrid::HybridDavinciModel>(hybrid_model.release()); auto shared_model = std::shared_ptr<hybrid::HybridDavinciModel>(hybrid_model.release());
InsertModel(model_id, shared_model); InsertModel(model_id, shared_model);
@@ -309,9 +309,9 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
GenModelId(&model_id); GenModelId(&model_id);
} }
auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel(); auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel();
string model_name = "";
string om_name;
if (IsNeedHybridLoad(*ge_root_model)) { if (IsNeedHybridLoad(*ge_root_model)) {
return DoLoadHybridModelOnline(model_id, model_name, ge_root_model, listener);
return DoLoadHybridModelOnline(model_id, om_name, ge_root_model, listener);
} }


mmTimespec timespec = mmGetTickCount(); mmTimespec timespec = mmGetTickCount();


+ 1
- 4
ge/graph/load/model_manager/task_info/end_graph_task_info.cc View File

@@ -45,10 +45,7 @@ Status EndGraphTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
Status EndGraphTaskInfo::Distribute() { Status EndGraphTaskInfo::Distribute() {
GELOGI("EndGraphTaskInfo Distribute Start."); GELOGI("EndGraphTaskInfo Distribute Start.");
GE_CHECK_NOTNULL(davinci_model_); GE_CHECK_NOTNULL(davinci_model_);
auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel();
if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() ||
all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() ||
all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) {
if (davinci_model_->ModelNeedDump()) {
GELOGI("Start to call rtEndGraphEx"); GELOGI("Start to call rtEndGraphEx");
rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag); rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {


+ 1
- 2
ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc View File

@@ -238,8 +238,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
} }


void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) { void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) {
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
op_desc->GetName())) {
if (davinci_model_->OpNeedDump(op_desc->GetName())) {
dump_flag_ = RT_KERNEL_DUMPFLAG; dump_flag_ = RT_KERNEL_DUMPFLAG;
dump_args_ = addr; dump_args_ = addr;
} }


+ 2
- 6
ge/graph/load/model_manager/task_info/kernel_task_info.cc View File

@@ -409,10 +409,7 @@ Status KernelTaskInfo::Distribute() {
call_skt, task_id_, skt_id_, skt_info.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); call_skt, task_id_, skt_id_, skt_info.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_);
// l1 fusion enable and env flag open (kCloseSkt for skt debug) // l1 fusion enable and env flag open (kCloseSkt for skt debug)
bool open_dump = false; bool open_dump = false;
auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel();
if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() ||
all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() ||
all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) {
if (davinci_model_->ModelNeedDump()) {
open_dump = true; open_dump = true;
} }
if (call_skt && (env_flag != kCloseSkt) && !open_dump) { if (call_skt && (env_flag != kCloseSkt) && !open_dump) {
@@ -980,8 +977,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
} }


void KernelTaskInfo::InitDumpTask(uint32_t offset) { void KernelTaskInfo::InitDumpTask(uint32_t offset) {
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
op_desc_->GetName())) {
if (davinci_model_->OpNeedDump(op_desc_->GetName())) {
if (IsL1FusionOp(op_desc_)) { if (IsL1FusionOp(op_desc_)) {
dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG; dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG;
} else { } else {


+ 33
- 0
ge/graph/passes/atomic_addr_clean_pass.cc View File

@@ -222,6 +222,39 @@ Status AtomicAddrCleanPass::HandleNormalGraph(ComputeGraphPtr &graph, const vect
} }
} }
} }
return LinkToPotentialPrecedenceNode(graph, clean_addr_node);
}

// Add control edges from atomic clean node to all potential precedence nodes which may execute before atomic clean
// node. We hope that atomic clean node can execute with the highest priority in the entire graph. Because of stream
// concurrency mechanism, only placing it at the head can not ensure that priority. Therefore, we need to add control
// edges from atomic clean node to the nodes that may be the first node on each stream. Generally, the first nodes on
// each stream are successors of Data/Variable, and Data/Variable won't generate task or execute, so we link to the
// successors of Data/Variable.
Status AtomicAddrCleanPass::LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node) {
GELOGD("Start to add control edges from %s to all second-nodes behind first-nodes which have no input.",
atomic_clean_node->GetName().c_str());
auto out_ctrl_anchor = atomic_clean_node->GetOutControlAnchor();
GE_CHECK_NOTNULL(out_ctrl_anchor);

for (const auto &node : graph->GetDirectNode()) {
GE_CHECK_NOTNULL(node);
bool need_handle = (node->GetType() == DATA || node->GetType() == VARIABLE) && node->GetInAllNodes().empty();
if (!need_handle) {
continue;
}
auto second_nodes = node->GetOutAllNodes();
for (const auto &second_node : second_nodes) {
GE_CHECK_NOTNULL(second_node);
auto in_ctrl_anchor = second_node->GetInControlAnchor();
GE_CHECK_NOTNULL(in_ctrl_anchor);
if (!out_ctrl_anchor->IsLinkedWith(in_ctrl_anchor)) {
GE_CHK_STATUS_RET(out_ctrl_anchor->LinkTo(in_ctrl_anchor));
GELOGD("Add control edge from %s to %s.", atomic_clean_node->GetName().c_str(), second_node->GetName().c_str());
}
}
}

return SUCCESS; return SUCCESS;
} }




+ 8
- 0
ge/graph/passes/atomic_addr_clean_pass.h View File

@@ -68,6 +68,14 @@ class AtomicAddrCleanPass : public GraphPass {
Status LinkToAtomicNode(const NodePtr &atomic_node, NodePtr &atomic_clean_node); Status LinkToAtomicNode(const NodePtr &atomic_node, NodePtr &atomic_clean_node);


/** /**
* Link atomic clean node to all potential precedence nodes which may execute before atomic clean node
* @param graph
* @param atomic_clean_node
* @return
*/
Status LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node);

/**
* Check if this node is atomic op. * Check if this node is atomic op.
* @param node * @param node
* @return * @return


+ 2
- 1
ge/graph/preprocess/insert_op/ge_aipp_op.cc View File

@@ -428,7 +428,8 @@ Status AippOp::ConvertRelatedInputNameToRank() {
if (!convert_flag) { if (!convert_flag) {
string error_msg = "Top name " + related_input_name + "convert rank failed, Please" string error_msg = "Top name " + related_input_name + "convert rank failed, Please"
" ensure top name in aipp config is the top name of data node."; " ensure top name in aipp config is the top name of data node.";
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str());
REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg}));
return PARAM_INVALID; return PARAM_INVALID;
} }




+ 8
- 4
ge/graph/preprocess/insert_op/util_insert_aipp_op.cc View File

@@ -124,13 +124,15 @@ Status InsertNewOpUtil::CheckInputNamePositionNotRepeat() {
if (another_item->related_input_name().empty()) { if (another_item->related_input_name().empty()) {
string error_msg = "Can not both set related_input_name and related_input_rank!" string error_msg = "Can not both set related_input_name and related_input_rank!"
" Please ensure param is the same with the first aipp config(related_input_name)."; " Please ensure param is the same with the first aipp config(related_input_name).";
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str());
REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg}));
return PARAM_INVALID; return PARAM_INVALID;
} }
if (item->related_input_name() == another_item->related_input_name()) { if (item->related_input_name() == another_item->related_input_name()) {
string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_name" string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_name"
" param is different in different aipp config."; " param is different in different aipp config.";
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str());
REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg}));
return PARAM_INVALID; return PARAM_INVALID;
} }
} }
@@ -150,13 +152,15 @@ Status InsertNewOpUtil::CheckInputRankPositionNoRepeat() {
if (!another_item->related_input_name().empty()) { if (!another_item->related_input_name().empty()) {
string error_msg = "Can not both set related_input_rank and related_input_name!" string error_msg = "Can not both set related_input_rank and related_input_name!"
" Please ensure param is the same with the first aipp config(related_input_rank)."; " Please ensure param is the same with the first aipp config(related_input_rank).";
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str());
REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg}));
return PARAM_INVALID; return PARAM_INVALID;
} }
if (item->related_input_rank() == another_item->related_input_rank()) { if (item->related_input_rank() == another_item->related_input_rank()) {
string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_rank" string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_rank"
" param is different in different aipp config."; " param is different in different aipp config.";
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str());
REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg}));
return PARAM_INVALID; return PARAM_INVALID;
} }
} }


+ 1
- 1
ge/hybrid/executor/hybrid_execution_context.h View File

@@ -68,7 +68,7 @@ struct GraphExecutionContext {
DumpProperties dump_properties; DumpProperties dump_properties;
bool trace_enabled = false; bool trace_enabled = false;
bool dump_enabled = false; bool dump_enabled = false;
std::atomic_bool is_eos_;
std::atomic_bool is_eos_{false};
long profiling_level = 0; long profiling_level = 0;
long iteration = 0; long iteration = 0;
void *global_step = nullptr; void *global_step = nullptr;


+ 0
- 4
ge/hybrid/executor/hybrid_model_async_executor.cc View File

@@ -46,10 +46,6 @@ void HybridModelAsyncExecutor::SetModelId(uint32_t model_id) {
model_id_ = model_id; model_id_ = model_id;
} }


void HybridModelAsyncExecutor::SetModelName(const string &model_name) {
om_name_ = model_name;
}

Status HybridModelAsyncExecutor::EnqueueData(const shared_ptr<InputDataWrapper> &data) { Status HybridModelAsyncExecutor::EnqueueData(const shared_ptr<InputDataWrapper> &data) {
GE_CHK_STATUS_EXEC(data_inputer_->Push(data), return domi::DATA_QUEUE_ISFULL, GE_CHK_STATUS_EXEC(data_inputer_->Push(data), return domi::DATA_QUEUE_ISFULL,
"Data queue is full, please call again later, model_id %u ", model_id_); "Data queue is full, please call again later, model_id %u ", model_id_);


+ 0
- 3
ge/hybrid/executor/hybrid_model_async_executor.h View File

@@ -51,8 +51,6 @@ class HybridModelAsyncExecutor {


void SetModelId(uint32_t model_id); void SetModelId(uint32_t model_id);


void SetModelName(const string &model_name);

Status Stop(); Status Stop();


Status EnqueueData(const std::shared_ptr<InputDataWrapper> &data); Status EnqueueData(const std::shared_ptr<InputDataWrapper> &data);
@@ -97,7 +95,6 @@ class HybridModelAsyncExecutor {
std::map<uint32_t, GeTensorDescPtr> input_tensor_desc_; std::map<uint32_t, GeTensorDescPtr> input_tensor_desc_;
std::vector<bool> is_input_dynamic_; std::vector<bool> is_input_dynamic_;
std::shared_ptr<ModelListener> listener_; std::shared_ptr<ModelListener> listener_;
string om_name_;
DataDumper data_dumper_; DataDumper data_dumper_;
bool is_op_debug_reg_ = false; bool is_op_debug_reg_ = false;
OpdebugRegister op_debug_register_; OpdebugRegister op_debug_register_;


+ 15
- 7
ge/hybrid/executor/hybrid_model_executor.cc View File

@@ -33,9 +33,6 @@ HybridModelExecutor::~HybridModelExecutor() {
if (context_.rt_gen_context != nullptr) { if (context_.rt_gen_context != nullptr) {
(void) rtCtxDestroy(context_.rt_gen_context); (void) rtCtxDestroy(context_.rt_gen_context);
} }
if (context_.global_step != nullptr) {
(void) rtFree(context_.global_step);
}
} }


Status HybridModelExecutor::Init() { Status HybridModelExecutor::Init() {
@@ -49,9 +46,10 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) {
GELOGD("Start to execute model."); GELOGD("Start to execute model.");
auto root_graph_item = model_->GetRootGraphItem(); auto root_graph_item = model_->GetRootGraphItem();
GE_CHECK_NOTNULL(root_graph_item); GE_CHECK_NOTNULL(root_graph_item);

GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration,
sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream));
if (context_.global_step != nullptr) {
GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration,
sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream));
}
SubgraphExecutor executor(model_->GetRootGraphItem(), &context_); SubgraphExecutor executor(model_->GetRootGraphItem(), &context_);
auto ret = ExecuteGraphInternal(executor, args); auto ret = ExecuteGraphInternal(executor, args);
Cleanup(); Cleanup();
@@ -102,8 +100,8 @@ Status HybridModelExecutor::InitExecutionContext() {
GE_CHK_RT_RET(rtCtxGetCurrent(&context_.rt_context)); GE_CHK_RT_RET(rtCtxGetCurrent(&context_.rt_context));
GE_CHK_RT_RET(rtCtxCreate(&context_.rt_gen_context, RT_CTX_GEN_MODE, 0)); GE_CHK_RT_RET(rtCtxCreate(&context_.rt_gen_context, RT_CTX_GEN_MODE, 0));
GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context)); GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context));
GE_CHK_RT_RET(rtMalloc(&context_.global_step, sizeof(uint64_t), RT_MEMORY_HBM));


context_.global_step = model_->GetGlobalStep();
context_.stream = stream_; context_.stream = stream_;
context_.model = model_; context_.model = model_;
context_.is_eos_ = false; context_.is_eos_ = false;
@@ -136,6 +134,16 @@ Status HybridModelExecutor::ResetExecutionContext(GraphExecutionContext &context
string ctx_id = std::to_string(context.context_id); string ctx_id = std::to_string(context.context_id);
RuntimeInferenceContext::DestroyContext(ctx_id); RuntimeInferenceContext::DestroyContext(ctx_id);
GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext");
RuntimeInferenceContext *ctx = nullptr;
GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(ctx_id, &ctx), "Failed to get context");
for (auto &host_tensor : context.model->GetHostTensors()) {
auto node_id = host_tensor.first;
for (const auto &output_idx_and_tensor : host_tensor.second) {
auto output_idx = output_idx_and_tensor.first;
GELOGD("Preload const host tensor, node_id = %ld, output id = %d", node_id, output_idx);
ctx->SetTensor(node_id, output_idx, output_idx_and_tensor.second.Clone());
}
}
return SUCCESS; return SUCCESS;
} }
} // namespace hybrid } // namespace hybrid


+ 10
- 0
ge/hybrid/executor/hybrid_model_pipeline_executor.cc View File

@@ -38,6 +38,16 @@ Status StageExecutor::ResetExecutionContext(GraphExecutionContext &context) {
string ctx_id = std::to_string(context.context_id); string ctx_id = std::to_string(context.context_id);
RuntimeInferenceContext::DestroyContext(ctx_id); RuntimeInferenceContext::DestroyContext(ctx_id);
GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext");
RuntimeInferenceContext *ctx = nullptr;
GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(ctx_id, &ctx), "Failed to get context");
for (auto &host_tensor : context.model->GetHostTensors()) {
auto node_id = host_tensor.first;
for (const auto &output_idx_and_tensor : host_tensor.second) {
auto output_idx = output_idx_and_tensor.first;
GELOGD("Preload const host tensor, node_id = %ld, output id = %d", node_id, output_idx);
ctx->SetTensor(node_id, output_idx, output_idx_and_tensor.second.Clone());
}
}
return SUCCESS; return SUCCESS;
} }




+ 14
- 10
ge/hybrid/executor/worker/execution_engine.cc View File

@@ -206,31 +206,35 @@ Status NodeDoneCallback::DumpDynamicNode() {
return PARAM_INVALID; return PARAM_INVALID;
} }
auto op_desc = node->GetOpDesc(); auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(graph_context_);
const HybridModel *model = graph_context_->model;
GE_CHECK_NOTNULL(model);
std::string dynamic_model_name = model->GetModelName();
std::string dynamic_om_name = model->GetOmName();
uint32_t model_id = model->GetModelId();
if (!context_->GetDumpProperties().IsLayerNeedDump(dynamic_model_name, dynamic_om_name, op_desc->GetName())) {
GELOGI("[%s] is not in dump list, no need dump", op_desc->GetName().c_str());
return SUCCESS;
}
dump_op_.SetDynamicModelInfo(dynamic_model_name, dynamic_om_name, model_id);

auto stream = context_->GetStream(); auto stream = context_->GetStream();
vector<uintptr_t> input_addrs; vector<uintptr_t> input_addrs;
vector<uintptr_t> output_addrs; vector<uintptr_t> output_addrs;
for (int i = 0; i < context_->NumInputs(); i++) { for (int i = 0; i < context_->NumInputs(); i++) {
auto tensor_value = context_->GetInput(i); auto tensor_value = context_->GetInput(i);
GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "Tensor value is nullptr"); GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "Tensor value is nullptr");
uint64_t input_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData());
uintptr_t input_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData());
input_addrs.emplace_back(input_addr); input_addrs.emplace_back(input_addr);
} }
for (int j = 0; j < context_->NumOutputs(); j++) { for (int j = 0; j < context_->NumOutputs(); j++) {
auto tensor_value = context_->GetOutput(j); auto tensor_value = context_->GetOutput(j);
GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "Tensor value is nullptr"); GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "Tensor value is nullptr");
uint64_t output_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData());
uintptr_t output_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData());
output_addrs.emplace_back(output_addr); output_addrs.emplace_back(output_addr);
} }

dump_op_.SetDumpInfo(context_->GetDumpProperties(), op_desc, input_addrs, output_addrs, stream); dump_op_.SetDumpInfo(context_->GetDumpProperties(), op_desc, input_addrs, output_addrs, stream);


GE_CHECK_NOTNULL(graph_context_);
const HybridModel *model = graph_context_->model;
GE_CHECK_NOTNULL(model);
std::string dynamic_model_name = model->GetModelName();
uint32_t model_id = model->GetModelId();
dump_op_.SetDynamicModelInfo(dynamic_model_name, model_id);

void *loop_per_iter = nullptr; void *loop_per_iter = nullptr;
TensorValue *varible_loop_per_iter = context_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER); TensorValue *varible_loop_per_iter = context_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER);
if (varible_loop_per_iter != nullptr) { if (varible_loop_per_iter != nullptr) {


+ 4
- 5
ge/hybrid/hybrid_davinci_model.cc View File

@@ -76,9 +76,8 @@ class HybridDavinciModel::Impl {
executor_.SetDeviceId(device_id); executor_.SetDeviceId(device_id);
} }


void SetModelName(const string &model_name) {
model_.SetModelName(model_name);
executor_.SetModelName(model_name);
void SetOmName(const string &model_name) {
model_.SetOmName(model_name);
} }


uint64_t GetSessionId() { uint64_t GetSessionId() {
@@ -181,9 +180,9 @@ void HybridDavinciModel::SetDeviceId(uint32_t device_id) {
} }
} }


void HybridDavinciModel::SetModelName(const string &model_name) {
void HybridDavinciModel::SetOmName(const string &om_name) {
if (impl_ != nullptr) { if (impl_ != nullptr) {
impl_->SetModelName(model_name);
impl_->SetOmName(om_name);
} }
} }




+ 1
- 1
ge/hybrid/hybrid_davinci_model.h View File

@@ -57,7 +57,7 @@ class HybridDavinciModel {


void SetDeviceId(uint32_t device_id); void SetDeviceId(uint32_t device_id);


void SetModelName(const string &model_name);
void SetOmName(const string &om_name);


uint64_t GetSessionId(); uint64_t GetSessionId();




+ 1
- 1
ge/hybrid/hybrid_davinci_model_stub.cc View File

@@ -61,7 +61,7 @@ void HybridDavinciModel::SetModelId(uint32_t model_id) {
void HybridDavinciModel::SetDeviceId(uint32_t device_id) { void HybridDavinciModel::SetDeviceId(uint32_t device_id) {
} }


void HybridDavinciModel::SetModelName(const string &model_name) {
void HybridDavinciModel::SetOmName(const string &om_name) {
} }


uint64_t HybridDavinciModel::GetSessionId() { uint64_t HybridDavinciModel::GetSessionId() {


+ 20
- 0
ge/hybrid/model/hybrid_model.cc View File

@@ -357,5 +357,25 @@ TensorValue *HybridModel::GetTensor(const NodePtr &node) const {


return GetVariable(node->GetName()); return GetVariable(node->GetName());
} }

const map<int64_t, std::vector<std::pair<int, Tensor>>> &HybridModel::GetHostTensors() const {
return host_tensors_;
}

void *HybridModel::GetGlobalStep() const {
if (global_step_ == nullptr) {
return nullptr;
}
return global_step_->GetData();
}

TensorBuffer *HybridModel::GetModelWeight(const string &subgraph_name) const {
auto it = weight_buffer_map_.find(subgraph_name);
if (it == weight_buffer_map_.end()) {
GELOGD("Model weight not found, subgraph name = %s", subgraph_name.c_str());
return nullptr;
}
return it->second.get();
}
} // namespace hybrid } // namespace hybrid
} // namespace ge } // namespace ge

+ 10
- 3
ge/hybrid/model/hybrid_model.h View File

@@ -45,6 +45,8 @@ class HybridModel {
return root_runtime_param_.session_id; return root_runtime_param_.session_id;
} }


void *GetGlobalStep() const;

GeModelPtr GetGeModel(const NodePtr &node) const; GeModelPtr GetGeModel(const NodePtr &node) const;


NodeItem *MutableNodeItem(const NodePtr &node); NodeItem *MutableNodeItem(const NodePtr &node);
@@ -69,8 +71,8 @@ class HybridModel {
model_id_ = model_id; model_id_ = model_id;
} }


void SetModelName(const string &model_name) {
om_name_ = model_name;
void SetOmName(const string &om_name) {
om_name_ = om_name;
} }


const std::string &GetOmName() const { const std::string &GetOmName() const {
@@ -91,6 +93,10 @@ class HybridModel {


TensorValue* GetTensor(const NodePtr &node) const; TensorValue* GetTensor(const NodePtr &node) const;


TensorBuffer* GetModelWeight(const std::string &subgraph_name) const;

const std::map<int64_t, std::vector<std::pair<int, Tensor>>> &GetHostTensors() const;

const std::vector<domi::TaskDef>* GetTaskDefs(const NodePtr &node) const; const std::vector<domi::TaskDef>* GetTaskDefs(const NodePtr &node) const;


const GraphItem *GetRootGraphItem() const; const GraphItem *GetRootGraphItem() const;
@@ -146,6 +152,7 @@ class HybridModel {
std::unique_ptr<GraphItem> root_graph_item_; std::unique_ptr<GraphItem> root_graph_item_;
std::map<std::string, std::unique_ptr<GraphItem>> subgraph_items_; std::map<std::string, std::unique_ptr<GraphItem>> subgraph_items_;
std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_; std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_;
std::map<int64_t, std::vector<std::pair<int, Tensor>>> host_tensors_;


bool is_new_model_desc_ = false; // support aipp bool is_new_model_desc_ = false; // support aipp
bool is_single_op_ = false; bool is_single_op_ = false;
@@ -154,10 +161,10 @@ class HybridModel {
uint32_t device_id_ = 0; uint32_t device_id_ = 0;
uint32_t model_id_ = 0; uint32_t model_id_ = 0;
uint8_t *var_mem_base_ = nullptr; uint8_t *var_mem_base_ = nullptr;
std::unique_ptr<TensorBuffer> weight_buffer_;
std::map<string, std::unique_ptr<TensorBuffer>> weight_buffer_map_; std::map<string, std::unique_ptr<TensorBuffer>> weight_buffer_map_;
RuntimeParam root_runtime_param_; RuntimeParam root_runtime_param_;
string om_name_; string om_name_;
std::unique_ptr<TensorBuffer> global_step_;
}; };
} // namespace hybrid } // namespace hybrid
} // namespace ge } // namespace ge


+ 113
- 5
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -145,6 +145,9 @@ Status HybridModelBuilder::Build() {
GE_CHK_STATUS_RET(InitConstantOps(), "[%s] Failed to init constant op", GetGraphName()); GE_CHK_STATUS_RET(InitConstantOps(), "[%s] Failed to init constant op", GetGraphName());
GE_CHK_STATUS_RET(InitVariableTensors(), "[%s] Failed to init variables", GetGraphName()); GE_CHK_STATUS_RET(InitVariableTensors(), "[%s] Failed to init variables", GetGraphName());
GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName()); GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName());
GE_CHK_STATUS_RET(OptimizeDependenciesForConstantInputs(),
"[%s] Failed to optimize dependencies for constant inputs",
GetGraphName());
GELOGI("[%s] Done building hybrid model successfully.", GetGraphName()); GELOGI("[%s] Done building hybrid model successfully.", GetGraphName());
return SUCCESS; return SUCCESS;
} }
@@ -315,6 +318,18 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s
} }
} }


for (const auto &src_node : ge_node->GetInControlNodes()) {
auto src_node_item = MutableNodeItem(src_node);
if ((src_node_item != nullptr) && (is_hccl_op || src_node_item->IsHcclOp())) {
GELOGD("[%s](%s) Add input control dependent node [%s](%s)",
ge_node->GetName().c_str(),
ge_node->GetType().c_str(),
src_node->GetName().c_str(),
src_node->GetType().c_str());
dependent_for_execution.emplace(src_node);
}
}

// cond or branch need to be prepared before the execution of IF or CASE // cond or branch need to be prepared before the execution of IF or CASE
if (node_item.node_type == IF || node_item.node_type == STATELESSIF || node_item.node_type == CASE) { if (node_item.node_type == IF || node_item.node_type == STATELESSIF || node_item.node_type == CASE) {
auto src_node = NodeUtils::GetInDataNodeByIndex(*ge_node, 0); // cond input auto src_node = NodeUtils::GetInDataNodeByIndex(*ge_node, 0); // cond input
@@ -346,6 +361,7 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s
auto src_node_item = MutableNodeItem(src_node); auto src_node_item = MutableNodeItem(src_node);
src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx()); src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx());
dependent_for_shape_inference.emplace(src_node); dependent_for_shape_inference.emplace(src_node);
host_input_value_dependencies_[&node_item].emplace_back(peer_out_anchor->GetIdx(), src_node_item);
GELOGD("[%s] Dependent added from output of [%s:%d]", GELOGD("[%s] Dependent added from output of [%s:%d]",
node_item.NodeName().c_str(), node_item.NodeName().c_str(),
src_node_item->NodeName().c_str(), src_node_item->NodeName().c_str(),
@@ -1494,7 +1510,7 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) {
src_node->GetName().c_str(), src_node->GetName().c_str(),
src_op_type.c_str()); src_op_type.c_str());


if (src_op_type != CONSTANTOP && src_op_type != VARIABLE) {
if (src_op_type != CONSTANTOP && src_op_type != CONSTANT && src_op_type != VARIABLE) {
continue; continue;
} }


@@ -1503,6 +1519,9 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) {
GELOGD("Got parent output index = %u", parent_index); GELOGD("Got parent output index = %u", parent_index);
GE_CHECK_LE(parent_index, INT32_MAX); GE_CHECK_LE(parent_index, INT32_MAX);
node_item.ref_outputs.emplace(static_cast<int>(parent_index), src_node); node_item.ref_outputs.emplace(static_cast<int>(parent_index), src_node);
if (src_op_type == CONSTANTOP || src_op_type == CONSTANT) {
known_subgraph_constant_output_refs_[&node_item].emplace(parent_index, src_node);
}
} }


// Data nodes marked with REF_VAR_SRC_VAR_NAME // Data nodes marked with REF_VAR_SRC_VAR_NAME
@@ -1568,6 +1587,10 @@ Status HybridModelBuilder::InitModelMem() {
} }


runtime_param_.var_base = hybrid_model_.var_mem_base_; runtime_param_.var_base = hybrid_model_.var_mem_base_;
auto allocator = NpuMemoryAllocator::GetAllocator();
GE_CHECK_NOTNULL(allocator);
hybrid_model_.global_step_ = TensorBuffer::Create(allocator, sizeof(int64_t));
GE_CHECK_NOTNULL(hybrid_model_.global_step_);
return SUCCESS; return SUCCESS;
} }


@@ -2044,8 +2067,9 @@ Status HybridModelBuilder::CollectParallelGroups(NodeItem *node_item) {
const auto &node = node_item->node; const auto &node = node_item->node;
auto executor_type = NodeExecutorManager::GetInstance().ResolveExecutorType(*node); auto executor_type = NodeExecutorManager::GetInstance().ResolveExecutorType(*node);
if (executor_type == NodeExecutorManager::ExecutorType::HCCL) { if (executor_type == NodeExecutorManager::ExecutorType::HCCL) {
std::string parallel_group;
if (AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, parallel_group)) {
int64_t parallel_group_val = -1;
if (AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, parallel_group_val)) {
std::string parallel_group = std::to_string(parallel_group_val);
GELOGD("[%s] Got parallel group = [%s]", node_item->NodeName().c_str(), parallel_group.c_str()); GELOGD("[%s] Got parallel group = [%s]", node_item->NodeName().c_str(), parallel_group.c_str());
parallel_group_to_nodes_[parallel_group].emplace(node_item); parallel_group_to_nodes_[parallel_group].emplace(node_item);
std::set<std::string> group{parallel_group}; std::set<std::string> group{parallel_group};
@@ -2061,8 +2085,9 @@ Status HybridModelBuilder::CollectParallelGroups(NodeItem *node_item) {
auto subgraph = hybrid_model_.root_graph_->GetSubgraph(subgraph_name); auto subgraph = hybrid_model_.root_graph_->GetSubgraph(subgraph_name);
GE_CHECK_NOTNULL(subgraph); GE_CHECK_NOTNULL(subgraph);
for (const auto &sub_node : subgraph->GetAllNodes()) { for (const auto &sub_node : subgraph->GetAllNodes()) {
std::string parallel_group;
if (AttrUtils::GetStr(sub_node->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, parallel_group)) {
int64_t parallel_group_val = -1;
if (AttrUtils::GetInt(sub_node->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, parallel_group_val)) {
std::string parallel_group = std::to_string(parallel_group_val);
GELOGD("[%s::%s] Got parallel group = %s", GELOGD("[%s::%s] Got parallel group = %s",
subgraph_name.c_str(), subgraph_name.c_str(),
sub_node->GetName().c_str(), sub_node->GetName().c_str(),
@@ -2127,5 +2152,88 @@ Status HybridModelBuilder::ParseDependentByParallelGroup() {
} }
return SUCCESS; return SUCCESS;
} }

Status HybridModelBuilder::OptimizeDependenciesForConstantInputs() {
std::map<NodePtr, std::set<uint32_t>> converted;
for (auto &it : host_input_value_dependencies_) {
auto node_item = it.first;
std::map<NodeItem *, int> ref_counts;
bool changed = false;
for (auto output_idx_and_node : it.second) {
auto output_idx = output_idx_and_node.first;
auto src_node_item = output_idx_and_node.second;
++ref_counts[src_node_item];
NodePtr constant_node;
if (src_node_item->node_type == CONSTANT || src_node_item->node_type == CONSTANTOP) {
constant_node = src_node_item->node;
GELOGD("src node [%s] is a constant", src_node_item->NodeName().c_str());
} else {
auto iter = known_subgraph_constant_output_refs_.find(src_node_item);
if (iter != known_subgraph_constant_output_refs_.end()) {
constant_node = iter->second[output_idx];
if (constant_node != nullptr) {
GELOGD("Output[%u] of subgraph [%s] is a constant", output_idx, src_node_item->NodeName().c_str());
}
}
}

if (constant_node == nullptr) {
GELOGD("Output[%u] of [%s] is not a constant", output_idx, src_node_item->NodeName().c_str());
continue;
}

if (converted[constant_node].count(output_idx) == 0) {
GE_CHK_STATUS_RET(Convert2HostTensor(constant_node, src_node_item->node_id, output_idx),
"[%s] Failed to convert constant to host tensor", constant_node->GetName().c_str());
converted[constant_node].emplace(output_idx);
}

src_node_item->to_const_output_id_list.erase(output_idx);
--ref_counts[src_node_item];
changed = true;
}

if (changed) {
std::vector<NodePtr> depends_to_keep;
for (auto &ref_count_it : ref_counts) {
if (ref_count_it.second == 0) {
GELOGD("[%s] no longer depends on [%s] for shape inference",
node_item->NodeName().c_str(),
ref_count_it.first->NodeName().c_str());
} else {
depends_to_keep.emplace_back(ref_count_it.first->node);
}
}
node_item->dependents_for_shape_inference.swap(depends_to_keep);
}
}

return SUCCESS;
}
Status HybridModelBuilder::Convert2HostTensor(const NodePtr &node, int node_id, uint32_t output_idx) {
auto tensor_value = hybrid_model_.GetTensor(node);
GE_CHECK_NOTNULL(tensor_value);
auto tensor_desc = node->GetOpDesc()->MutableOutputDesc(0);
GE_CHECK_NOTNULL(tensor_desc);
Tensor tensor(TensorAdapter::GeTensorDesc2TensorDesc(*tensor_desc));
int64_t tensor_size = -1;
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorSizeInBytes(*tensor_desc, tensor_size),
"[%s] Failed to get tensor size", node->GetName().c_str());
if (tensor_size > 0) {
auto copy_size = static_cast<size_t>(tensor_size);
GE_CHECK_GE(tensor_value->GetSize(), copy_size);
std::vector<uint8_t> buffer(copy_size);
GE_CHK_RT_RET(rtMemcpy(buffer.data(),
copy_size,
tensor_value->GetData(),
copy_size,
RT_MEMCPY_DEVICE_TO_HOST));
tensor.SetData(std::move(buffer));
GELOGD("[%s] Copy constant tensor to host successfully, size = %zu", node->GetName().c_str(), copy_size);
}

hybrid_model_.host_tensors_[node_id].emplace_back(output_idx, std::move(tensor));
return SUCCESS;
}
} // namespace hybrid } // namespace hybrid
} // namespace ge } // namespace ge

+ 8
- 0
ge/hybrid/model/hybrid_model_builder.h View File

@@ -91,6 +91,8 @@ class HybridModelBuilder {
Status GenerateBpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list); Status GenerateBpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list);
Status GenerateEndProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list); Status GenerateEndProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list);
Status GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, vector<domi::TaskDef> &task_def_list); Status GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, vector<domi::TaskDef> &task_def_list);
Status OptimizeDependenciesForConstantInputs();
Status Convert2HostTensor(const NodePtr &node, int node_id, uint32_t output_idx);


const char* GetGraphName() const { const char* GetGraphName() const {
return hybrid_model_.model_name_.c_str(); return hybrid_model_.model_name_.c_str();
@@ -110,6 +112,12 @@ class HybridModelBuilder {


RuntimeParam &runtime_param_; RuntimeParam &runtime_param_;
VarManager *var_manager_ = nullptr; VarManager *var_manager_ = nullptr;

// map<known_node_item, map<output_idx, constant_node>>
std::map<NodeItem *, std::map<uint32_t, NodePtr>> known_subgraph_constant_output_refs_;

// map<dst_node_item, vector<output_idx, src_node_item>>
std::map<NodeItem *, std::vector<std::pair<uint32_t, NodeItem *>>> host_input_value_dependencies_;
}; };
} // namespace hybrid } // namespace hybrid
} // namespace ge } // namespace ge


+ 43
- 14
ge/hybrid/node_executor/aicore/aicore_op_task.cc View File

@@ -71,22 +71,22 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def)
} }


Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
auto op_desc_ptr = std::make_shared<OpDesc>(op_desc);
GE_CHECK_NOTNULL(op_desc_ptr);
auto tbe_kernel = op_desc_ptr->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
if (tbe_kernel == nullptr) {
GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc_ptr->GetName().c_str());
return INTERNAL_ERROR;
}
TBEHandleStore &kernel_store = TBEHandleStore::GetInstance();
rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str());
if (rt_ret != RT_ERROR_NONE || is_single_op_) { if (rt_ret != RT_ERROR_NONE || is_single_op_) {
auto op_desc_ptr = MakeShared<OpDesc>(op_desc);
GE_CHECK_NOTNULL(op_desc_ptr);
auto tbe_kernel = op_desc_ptr->TryGetExtAttr(GetKeyForTbeKernel(), TBEKernelPtr());
if (tbe_kernel == nullptr) {
GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc_ptr->GetName().c_str());
return INTERNAL_ERROR;
}
TBEHandleStore &kernel_store = TBEHandleStore::GetInstance();
void *bin_handle = nullptr; void *bin_handle = nullptr;
if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) {
GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str()); GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str());
rtDevBinary_t binary; rtDevBinary_t binary;
std::string json_string; std::string json_string;
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string),
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForTvmMagic(), json_string),
GELOGI("Get original type of session_graph_id.")); GELOGI("Get original type of session_graph_id."));
if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU;
@@ -104,7 +104,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
GELOGI("TBE: binary.length: %lu", binary.length); GELOGI("TBE: binary.length: %lu", binary.length);
GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle)); GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle));
std::string meta_data; std::string meta_data;
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_METADATA, meta_data),
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForTvmMetaData(), meta_data),
GELOGI("Get original type of json_string")); GELOGI("Get original type of json_string"));
GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str());
GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str())));
@@ -114,7 +114,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
kernel_store.ReferTBEHandle(stub_name_.c_str()); kernel_store.ReferTBEHandle(stub_name_.c_str());
} }
std::string kernel_name; std::string kernel_name;
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, op_desc_ptr->GetName() + "_kernelname", kernel_name),
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForKernelName(op_desc), kernel_name),
GELOGI("Get original type of kernel_name")); GELOGI("Get original type of kernel_name"));
GELOGI("TBE: binfile_key=%s, kernel_name=%s", stub_name_.c_str(), kernel_name.c_str()); GELOGI("TBE: binfile_key=%s, kernel_name=%s", stub_name_.c_str(), kernel_name.c_str());
GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(), stub_name_.c_str(), kernel_name.c_str(), 0)); GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(), stub_name_.c_str(), kernel_name.c_str(), 0));
@@ -349,9 +349,6 @@ Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info)
GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info), GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info),
"Failed calc tiling data of node %s.", "Failed calc tiling data of node %s.",
node->GetName().c_str()); node->GetName().c_str());
if (is_single_op_) {
tiling_info.clear_atomic = false;
}
GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str()); GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str());
return SUCCESS; return SUCCESS;
} }
@@ -468,6 +465,22 @@ std::string AiCoreOpTask::GetKeyForOpParamSize() const {
return kAttrOpParamSize; return kAttrOpParamSize;
} }


std::string AiCoreOpTask::GetKeyForTbeKernel() const {
return OP_EXTATTR_NAME_TBE_KERNEL;
}

std::string AiCoreOpTask::GetKeyForTvmMagic() const {
return TVM_ATTR_NAME_MAGIC;
}

std::string AiCoreOpTask::GetKeyForTvmMetaData() const {
return TVM_ATTR_NAME_METADATA;
}

std::string AiCoreOpTask::GetKeyForKernelName(const OpDesc &op_desc) const {
return op_desc.GetName() + "_kernelname";
}

Status AtomicAddrCleanOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { Status AtomicAddrCleanOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) {
GE_CHK_STATUS_RET_NOLOG(AiCoreOpTask::Init(op_desc, task_def)); GE_CHK_STATUS_RET_NOLOG(AiCoreOpTask::Init(op_desc, task_def));
return InitAtomicAddrCleanIndices(op_desc); return InitAtomicAddrCleanIndices(op_desc);
@@ -524,6 +537,22 @@ std::string AtomicAddrCleanOpTask::GetKeyForOpParamSize() const {
return kAttrAtomicOpParamSize; return kAttrAtomicOpParamSize;
} }


std::string AtomicAddrCleanOpTask::GetKeyForTbeKernel() const {
return EXT_ATTR_ATOMIC_TBE_KERNEL;
}

std::string AtomicAddrCleanOpTask::GetKeyForTvmMagic() const {
return ATOMIC_ATTR_TVM_MAGIC;
}

std::string AtomicAddrCleanOpTask::GetKeyForTvmMetaData() const {
return ATOMIC_ATTR_TVM_METADATA;
}

std::string AtomicAddrCleanOpTask::GetKeyForKernelName(const OpDesc &op_desc) const {
return op_desc.GetName() + "_atomic_kernelname";
}

Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) {
GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str());
GE_CHK_STATUS_RET(OpAtomicCalculate(*node, tiling_info), GE_CHK_STATUS_RET(OpAtomicCalculate(*node, tiling_info),


+ 8
- 0
ge/hybrid/node_executor/aicore/aicore_op_task.h View File

@@ -81,6 +81,10 @@ class AiCoreOpTask {
protected: protected:
Status UpdateTilingInfo(TaskContext &context); Status UpdateTilingInfo(TaskContext &context);
virtual std::string GetKeyForOpParamSize() const; virtual std::string GetKeyForOpParamSize() const;
virtual std::string GetKeyForTbeKernel() const;
virtual std::string GetKeyForTvmMagic() const;
virtual std::string GetKeyForTvmMetaData() const;
virtual std::string GetKeyForKernelName(const OpDesc &op_desc) const;
virtual Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info); virtual Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info);


std::unique_ptr<TensorBuffer> tiling_buffer_ = nullptr; std::unique_ptr<TensorBuffer> tiling_buffer_ = nullptr;
@@ -119,6 +123,10 @@ class AtomicAddrCleanOpTask : public AiCoreOpTask {


protected: protected:
std::string GetKeyForOpParamSize() const override; std::string GetKeyForOpParamSize() const override;
std::string GetKeyForTbeKernel() const override;
std::string GetKeyForTvmMagic() const override;
std::string GetKeyForTvmMetaData() const override;
std::string GetKeyForKernelName(const OpDesc &op_desc) const override;
Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info) override; Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info) override;


private: private:


+ 1
- 0
ge/hybrid/node_executor/aicore/aicore_task_builder.cc View File

@@ -70,6 +70,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<AiCoreNodeTask> &node_task,
auto atomic_task = auto atomic_task =
std::unique_ptr<AtomicAddrCleanOpTask>(new(std::nothrow)AtomicAddrCleanOpTask()); std::unique_ptr<AtomicAddrCleanOpTask>(new(std::nothrow)AtomicAddrCleanOpTask());
GE_CHECK_NOTNULL(atomic_task); GE_CHECK_NOTNULL(atomic_task);
atomic_task->SetSingleOp(is_single_op);
GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()),
"[%s] Failed to init task for AtomicAddrClean", "[%s] Failed to init task for AtomicAddrClean",
op_desc_->GetName().c_str()); op_desc_->GetName().c_str());


+ 27
- 15
ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc View File

@@ -18,6 +18,7 @@
#include "cce/aicpu_engine_struct.h" #include "cce/aicpu_engine_struct.h"
#include "framework/common/debug/ge_log.h" #include "framework/common/debug/ge_log.h"
#include "framework/common/fmk_error_codes.h" #include "framework/common/fmk_error_codes.h"
#include "common/dump/dump_manager.h"
#include "common/ge/ge_util.h" #include "common/ge/ge_util.h"
#include "graph/attr_value.h" #include "graph/attr_value.h"
#include "graph/debug/ge_attr_define.h" #include "graph/debug/ge_attr_define.h"
@@ -110,15 +111,6 @@ Status KnownNodeTask::Init(TaskContext &context) {
GELOGI("KnownNodeTask::Init mem base is %p, size %lu.", GELOGI("KnownNodeTask::Init mem base is %p, size %lu.",
davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size); davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size);
} }
if (!load_flag_) {
auto dump_properties = context.GetDumpProperties();
if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) {
davinci_model_->SetDumpProperties(dump_properties);
void *global_step = context.GetExecutionContext()->global_step;
davinci_model_->SetKnownShapeGlobalStep(global_step);
}
load_flag_ = true;
}
GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(), GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(),
davinci_model_->Id(), davinci_model_->SubModelId()), davinci_model_->Id(), davinci_model_->SubModelId()),
"KnownNodeTask::Init destroy aicpu kernel failed."); "KnownNodeTask::Init destroy aicpu kernel failed.");
@@ -126,20 +118,35 @@ Status KnownNodeTask::Init(TaskContext &context) {
return SUCCESS; return SUCCESS;
} }


Status KnownNodeTask::InitDavinciModel() {
GELOGD("[Init][Model] start");
Status KnownNodeTask::InitDavinciModel(const HybridModel &model, TensorBuffer *weight_buffer) {
GELOGD("[Init][DavinciModel] start");
davinci_model_->InitRuntimeParams(); davinci_model_->InitRuntimeParams();
GE_CHK_STATUS_RET(davinci_model_->InitVariableMem(), "init variable mem failed"); GE_CHK_STATUS_RET(davinci_model_->InitVariableMem(), "init variable mem failed");
int32_t device_id = 0; int32_t device_id = 0;
GE_CHK_RT_RET(rtGetDevice(&device_id)); GE_CHK_RT_RET(rtGetDevice(&device_id));
davinci_model_->SetDeviceId(static_cast<uint32_t>(device_id)); davinci_model_->SetDeviceId(static_cast<uint32_t>(device_id));
GE_CHK_STATUS_RET(DoInitDavinciModel(), "[Init][Model] Failed to init davinci model.");

auto dump_properties = DumpManager::GetInstance().GetDumpProperties(model.GetSessionId());
if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) {
davinci_model_->SetDumpProperties(dump_properties);
void *global_step = model.GetGlobalStep();
davinci_model_->SetKnownShapeGlobalStep(global_step);
}

void *weight = nullptr;
size_t weight_size = 0;
if (weight_buffer != nullptr) {
weight = weight_buffer->GetData();
weight_size = weight_buffer->GetSize();
}
GELOGD("Start to init davinci model, weight size = %zu", weight_size);
GE_CHK_STATUS_RET(DoInitDavinciModel(weight, weight_size), "[Init][Model] Failed to init davinci model.");
GELOGD("[Init][Model] success"); GELOGD("[Init][Model] success");
return SUCCESS; return SUCCESS;
} }


Status KnownNodeTask::DoInitDavinciModel() {
return davinci_model_->Init();
Status KnownNodeTask::DoInitDavinciModel(void *weight, size_t weight_size) {
return davinci_model_->Init(nullptr, 0, weight, weight_size);
} }


Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const {
@@ -165,12 +172,17 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node
const GeModelPtr ge_model = model.GetGeModel(node); const GeModelPtr ge_model = model.GetGeModel(node);
GE_CHECK_NOTNULL(ge_model); GE_CHECK_NOTNULL(ge_model);


AscendString graph_name;
GE_CHK_GRAPH_STATUS_RET(ge_model->GetGraph().GetName(graph_name), "Failed to get graph name");
auto weight_buffer = model.GetModelWeight(graph_name.GetString());

std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, nullptr); std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, nullptr);
GE_CHECK_NOTNULL(davinci_model); GE_CHECK_NOTNULL(davinci_model);


// set known node flag as true // set known node flag as true
davinci_model->SetKnownNode(true); davinci_model->SetKnownNode(true);
davinci_model->SetId(model.GetModelId()); davinci_model->SetId(model.GetModelId());
davinci_model->SetDumpModelName(model.GetModelName());
davinci_model->SetOmName(model.GetOmName()); davinci_model->SetOmName(model.GetOmName());
// set model id as root node's node id // set model id as root node's node id
davinci_model->SetSubModelId(node->GetOpDesc()->GetId()); davinci_model->SetSubModelId(node->GetOpDesc()->GetId());
@@ -180,7 +192,7 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node


auto known_node_task = MakeShared<KnownNodeTask>(davinci_model); auto known_node_task = MakeShared<KnownNodeTask>(davinci_model);
GE_CHECK_NOTNULL(known_node_task); GE_CHECK_NOTNULL(known_node_task);
GE_CHK_STATUS_RET_NOLOG(known_node_task->InitDavinciModel());
GE_CHK_STATUS_RET_NOLOG(known_node_task->InitDavinciModel(model, weight_buffer));
GELOGI("[%s] KnownNodeExecutor::LoadTask success.", node->GetName().c_str()); GELOGI("[%s] KnownNodeExecutor::LoadTask success.", node->GetName().c_str());
task = std::move(known_node_task); task = std::move(known_node_task);
return SUCCESS; return SUCCESS;


+ 2
- 3
ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h View File

@@ -36,13 +36,12 @@ class KnownNodeTask : public NodeTask {
Status UpdateArgs(TaskContext &context) override; Status UpdateArgs(TaskContext &context) override;
Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override;
Status Init(TaskContext &context) override; Status Init(TaskContext &context) override;
Status InitDavinciModel();
Status InitDavinciModel(const HybridModel &model, TensorBuffer *weight_buffer);


protected: protected:
virtual Status DoInitDavinciModel();
virtual Status DoInitDavinciModel(void *weight, size_t weight_size);
private: private:
std::shared_ptr<DavinciModel> davinci_model_ = nullptr; std::shared_ptr<DavinciModel> davinci_model_ = nullptr;
bool load_flag_ = false;
}; };


class KnownNodeExecutor : public NodeExecutor { class KnownNodeExecutor : public NodeExecutor {


+ 3
- 3
ge/single_op/single_op_model.cc View File

@@ -127,7 +127,7 @@ void SingleOpModel::ParseOpModelParams(ModelHelper &model_helper, SingleOpModelP
ret = ge::AttrUtils::GetInt(model, ATTR_MODEL_CORE_TYPE, value); ret = ge::AttrUtils::GetInt(model, ATTR_MODEL_CORE_TYPE, value);
param.core_type = ret ? value : 0; param.core_type = ret ? value : 0;


GELOGI("ParseOpModelParams(), total_memory_size:%lu, zero_copy_size:%lu, weight_size:%lu. core_type = %lu",
GELOGI("ParseOpModelParams(), total_memory_size:%lu, zero_copy_size:%lu, weight_size:%lu, core_type = %lu",
param.memory_size, param.zero_copy_mem_size, param.weight_size, param.core_type); param.memory_size, param.zero_copy_mem_size, param.weight_size, param.core_type);
} }


@@ -454,7 +454,7 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl


auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type == ccKernelType::TE) { if (kernel_type == ccKernelType::TE) {
GELOGD("Building TBE task");
GELOGD("Building TBE task.");
TbeOpTask *tbe_task = nullptr; TbeOpTask *tbe_task = nullptr;
GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task));
tbe_task->SetModelArgs(model_name_, model_id_); tbe_task->SetModelArgs(model_name_, model_id_);
@@ -482,7 +482,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
auto tasks = ge_model->GetModelTaskDefPtr()->task(); auto tasks = ge_model->GetModelTaskDefPtr()->task();
for (int i = 0; i < tasks.size(); ++i) { for (int i = 0; i < tasks.size(); ++i) {
const TaskDef &task_def = tasks[i]; const TaskDef &task_def = tasks[i];
GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(),
GELOGI("[%s] Task[%d], type = [%u], DebugString = [%s]", model_name_.c_str(), i, task_def.type(),
task_def.DebugString().c_str()); task_def.DebugString().c_str());
auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) { if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) {


+ 11
- 3
ge/single_op/task/op_task.cc View File

@@ -121,7 +121,7 @@ Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id
} }
GE_CHECK_NOTNULL(op_desc_); GE_CHECK_NOTNULL(op_desc_);
string op_name = op_desc_->GetName(); string op_name = op_desc_->GetName();
GELOGD("Get profiling args of op [%s] end, task_id[%u], stream_id[%u]", op_name.c_str(), task_id, stream_id);
GELOGD("Get profiling args of op [%s] end, task_id[%u], stream_id[%u].", op_name.c_str(), task_id, stream_id);
model_id = model_id_; model_id = model_id_;
task_desc_info.model_name = model_name_; task_desc_info.model_name = model_name_;
task_desc_info.block_dim = block_dim_; task_desc_info.block_dim = block_dim_;
@@ -459,10 +459,14 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc,
continue; continue;
} }
GE_CHK_BOOL_RET_STATUS(non_const_index < input_desc.size(), ACL_ERROR_GE_PARAM_INVALID, GE_CHK_BOOL_RET_STATUS(non_const_index < input_desc.size(), ACL_ERROR_GE_PARAM_INVALID,
"Input_desc size is %zu, but get non_const_index is %zu",
input_desc.size(), non_const_index);
"Input_desc size is %zu, but get non_const_index is %zu", input_desc.size(),
non_const_index);
GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateInputShapeAndType(input_index, input_desc[non_const_index]), GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateInputShapeAndType(input_index, input_desc[non_const_index]),
"Input[%zu] update input shape failed.", input_index); "Input[%zu] update input shape failed.", input_index);
if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) {
GE_CHK_STATUS_RET(op_desc_->UpdateInputDesc(input_index, input_desc[non_const_index]),
"AicpuTask Update [%zu]th input desc failed", input_index);
}
non_const_index++; non_const_index++;
} }


@@ -470,6 +474,10 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc,
for (size_t j = 0; j < num_outputs_; ++j) { for (size_t j = 0; j < num_outputs_; ++j) {
GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateOutputShapeAndType(j, output_desc[j]), GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateOutputShapeAndType(j, output_desc[j]),
"Output[%zu] UpdateOutputShapeAndType failed.", j); "Output[%zu] UpdateOutputShapeAndType failed.", j);
if (DumpManager::GetInstance().GetDumpProperties(kInferSessionId).IsSingleOpNeedDump()) {
GE_CHK_STATUS_RET(op_desc_->UpdateOutputDesc(j, output_desc[j]), "AicpuTask Update [%zu]th output desc failed",
j);
}
} }
} }




+ 1
- 0
inc/framework/generator/ge_generator.h View File

@@ -98,6 +98,7 @@ class GE_FUNC_VISIBILITY GeGenerator {
Status BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, Status BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
bool is_offline = true); bool is_offline = true);
void RemoveConst(const vector<GeTensor> &inputs, vector<GeTensor> &outputs);
Status CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs); Status CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs);


using GeRootModelPtr = std::shared_ptr<ge::GeRootModel>; using GeRootModelPtr = std::shared_ptr<ge::GeRootModel>;


+ 1
- 1
metadef

@@ -1 +1 @@
Subproject commit 8cf3c51d53a9f4ebd6d601a2383f62788e3b8176
Subproject commit 7aa912ab473b780c3d2f9c907760e4cb32dc0fb6

+ 1
- 1
parser

@@ -1 +1 @@
Subproject commit d851e1d467768b6cefd8f5f44745be1c5312121a
Subproject commit d4587c1c33d2d50ef157bbc0449484a196e91429

+ 2
- 0
tests/ut/ge/CMakeLists.txt View File

@@ -166,6 +166,7 @@ set(COMMON_SRC_FILES
"${GE_CODE_DIR}/ge/common/helper/model_helper.cc" "${GE_CODE_DIR}/ge/common/helper/model_helper.cc"
"${GE_CODE_DIR}/ge/common/dump/dump_manager.cc" "${GE_CODE_DIR}/ge/common/dump/dump_manager.cc"
"${GE_CODE_DIR}/ge/common/dump/opdebug_register.cc" "${GE_CODE_DIR}/ge/common/dump/opdebug_register.cc"
"${GE_CODE_DIR}/ge/common/dump/dump_op.cc"
"${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc"
"${GE_CODE_DIR}/ge/model/ge_root_model.cc" "${GE_CODE_DIR}/ge/model/ge_root_model.cc"
"${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc"
@@ -742,6 +743,7 @@ set(MULTI_PARTS_TEST_FILES
"graph/transop_util_unittest.cc" "graph/transop_util_unittest.cc"
"common/datatype_transfer_unittest.cc" "common/datatype_transfer_unittest.cc"
"common/dump_manager_unittest.cc" "common/dump_manager_unittest.cc"
"common/dump_op_unittest.cc"
"common/opdebug_register_unittest.cc" "common/opdebug_register_unittest.cc"
"common/format_transfer_unittest.cc" "common/format_transfer_unittest.cc"
"common/format_transfer_transpose_unittest.cc" "common/format_transfer_transpose_unittest.cc"


+ 61
- 0
tests/ut/ge/common/dump_op_unittest.cc View File

@@ -0,0 +1,61 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>

#define protected public
#define private public
#include "common/dump/dump_op.h"
#include "common/debug/log.h"
#include "common/ge_inner_error_codes.h"
#include "common/dump/dump_properties.h"
#undef private
#undef protected

namespace ge {
class UTEST_dump_op : public testing::Test {
protected:
void SetUp() {}
void TearDown() {}
};

TEST_F(UTEST_dump_op, launch_dump_op_success) {
DumpOp dump_op;
DumpProperties dump_properties;
OpDescPtr op_desc = std::make_shared<OpDesc>("GatherV2", "GatherV2");
std::set<std::string> temp;
dump_properties.model_dump_properties_map_.emplace("model1", temp);
dump_properties.enable_dump_ = "1";
dump_op.SetDynamicModelInfo("model1", "model2", 1);
dump_op.SetDumpInfo(dump_properties, op_desc, {}, {}, nullptr);
auto ret = dump_op.LaunchDumpOp();
EXPECT_EQ(ret, ge::SUCCESS);
}

TEST_F(UTEST_dump_op, launch_dump_op_success_2) {
DumpOp dump_op;
DumpProperties dump_properties;
OpDescPtr op_desc = std::make_shared<OpDesc>("GatherV2", "GatherV2");
std::set<std::string> temp;
dump_properties.model_dump_properties_map_.emplace("model1", temp);
dump_properties.enable_dump_ = "1";
dump_op.SetDynamicModelInfo("modle2", "model2", 1);
dump_op.SetDumpInfo(dump_properties, op_desc, {}, {}, nullptr);
auto ret = dump_op.LaunchDumpOp();
EXPECT_EQ(ret, ge::SUCCESS);
}

} // namespace ge

+ 33
- 2
tests/ut/ge/graph/passes/atomic_addr_clean_pass_unittest.cc View File

@@ -48,18 +48,49 @@ public:
return node; return node;
} }


int CountOfAtomicCleanNode() {
int node_num = 0;
for (NodePtr &node : graph_->GetDirectNode()) {
if (node->GetType() == ATOMICADDRCLEAN) {
++node_num;
}
}
return node_num;
}

ComputeGraphPtr graph_; ComputeGraphPtr graph_;
}; };


// node1 -> node2 -> node3
/*
* Data Data Atomic_clean
* | | / |
* relu relu |
* | ==> | |
* relu(atomic) relu(atomic)
* | |
* netoutput netoutput
*/
TEST_F(UtestGraphPassesAtomicAddrCleanPass, pass_run_success) { TEST_F(UtestGraphPassesAtomicAddrCleanPass, pass_run_success) {
auto node1 = NewNode("node1", DATA, 0, 1); auto node1 = NewNode("node1", DATA, 0, 1);

auto node2 = NewNode("node2", RELU, 1, 1); auto node2 = NewNode("node2", RELU, 1, 1);
auto node3 = NewNode("node3", NETOUTPUT, 1, 0);
auto node3 = NewNode("node3", RELU, 1, 1);
auto op_desc = node3->GetOpDesc();
vector<int64_t> atomic_input_index = {123, 456};
AttrUtils::SetListInt(op_desc, "atomic_input_index", atomic_input_index);

auto node4 = NewNode("node4", NETOUTPUT, 1, 0);
GraphUtils::AddEdge(node1->GetOutDataAnchor(0), node2->GetInDataAnchor(0)); GraphUtils::AddEdge(node1->GetOutDataAnchor(0), node2->GetInDataAnchor(0));
GraphUtils::AddEdge(node2->GetOutDataAnchor(0), node3->GetInDataAnchor(0)); GraphUtils::AddEdge(node2->GetOutDataAnchor(0), node3->GetInDataAnchor(0));
GraphUtils::AddEdge(node3->GetOutDataAnchor(0), node4->GetInDataAnchor(0));
AtomicAddrCleanPass atomi_addr_clean_pass; AtomicAddrCleanPass atomi_addr_clean_pass;
Status ret = atomi_addr_clean_pass.Run(graph_); Status ret = atomi_addr_clean_pass.Run(graph_);
EXPECT_EQ(ret, SUCCESS); EXPECT_EQ(ret, SUCCESS);
EXPECT_EQ(1, CountOfAtomicCleanNode());
auto atomic_clean = graph_->FindNode("atomic_addr_clean");
EXPECT_NE(atomic_clean, nullptr);
auto out_ctrl_nodes = atomic_clean->GetOutControlNodes();
EXPECT_EQ(out_ctrl_nodes.size(), 2);
} }
} // namespace ge } // namespace ge

Loading…
Cancel
Save