Browse Source

Merge branch 'development' of gitee.com:dong-duo/graphengine into development

tags/v1.1.0
dongduo 4 years ago
parent
commit
a1fc89acfd
40 changed files with 727 additions and 338 deletions
  1. +117
    -8
      ge/common/profiling/profiling_manager.cc
  2. +16
    -4
      ge/common/profiling/profiling_manager.h
  3. +1
    -0
      ge/common/types.cc
  4. +13
    -0
      ge/executor/ge_executor.cc
  5. +21
    -10
      ge/graph/build/label_allocator.cc
  6. +9
    -0
      ge/graph/build/memory/block_mem_assigner.cc
  7. +10
    -2
      ge/graph/build/model_builder.cc
  8. +9
    -0
      ge/graph/common/transop_util.cc
  9. +2
    -0
      ge/graph/common/transop_util.h
  10. +1
    -0
      ge/graph/load/new_model_manager/data_dumper.h
  11. +70
    -87
      ge/graph/load/new_model_manager/davinci_model.cc
  12. +3
    -2
      ge/graph/load/new_model_manager/davinci_model.h
  13. +82
    -11
      ge/graph/load/new_model_manager/model_manager.cc
  14. +5
    -0
      ge/graph/load/new_model_manager/model_manager.h
  15. +1
    -1
      ge/graph/load/new_model_manager/zero_copy_task.cc
  16. +1
    -1
      ge/graph/load/new_model_manager/zero_copy_task.h
  17. +22
    -0
      ge/graph/manager/graph_manager.cc
  18. +6
    -1
      ge/graph/passes/memcpy_addr_async_pass.cc
  19. +7
    -1
      ge/graph/passes/net_output_pass.cc
  20. +1
    -0
      ge/graph/passes/net_output_pass.h
  21. +57
    -6
      ge/graph/preprocess/graph_preprocess.cc
  22. +88
    -97
      ge/graph/preprocess/insert_op/ge_aipp_op.cc
  23. +24
    -30
      ge/graph/preprocess/insert_op/util_insert_aipp_op.cc
  24. +9
    -0
      ge/graph/preprocess/multi_batch_copy_graph.cc
  25. +4
    -0
      ge/graph/preprocess/multi_batch_options.cc
  26. +3
    -1
      ge/host_kernels/slice_kernel.cc
  27. +3
    -1
      ge/hybrid/executor/worker/execution_engine.cc
  28. +27
    -15
      ge/hybrid/node_executor/aicore/aicore_node_executor.cc
  29. +36
    -3
      ge/hybrid/node_executor/aicore/aicore_node_executor.h
  30. +14
    -2
      ge/hybrid/node_executor/aicore/aicore_task_compiler.cc
  31. +5
    -3
      ge/hybrid/node_executor/aicore/aicore_task_compiler.h
  32. +6
    -0
      ge/init/gelib.cc
  33. +0
    -20
      ge/offline/main.cc
  34. +4
    -0
      ge/session/omg.cc
  35. +1
    -0
      inc/external/ge/ge_api_types.h
  36. +30
    -14
      inc/framework/common/debug/log.h
  37. +5
    -4
      inc/framework/common/types.h
  38. +12
    -12
      inc/framework/executor/ge_executor.h
  39. +1
    -1
      metadef
  40. +1
    -1
      parser

+ 117
- 8
ge/common/profiling/profiling_manager.cc View File

@@ -21,6 +21,7 @@
#include "framework/common/string_util.h"
#include "graph/ge_context.h"
#include "runtime/base.h"
#include "graph/load/new_model_manager/davinci_model.h"

namespace {
const char *const kJobID = "jobID";
@@ -39,10 +40,12 @@ const std::string kConfigNumsdev = "devNums";
const std::string kConfigDevIdList = "devIdList";
const std::string kProfStart = "prof_start";
const std::string kProfStop = "prof_stop";
const std::string kProfModelSubscribe = "prof_model_subscribe";
const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe";
} // namespace

namespace ge {
ProfilingManager::ProfilingManager() {}
ProfilingManager::ProfilingManager() : subscribe_count_(0) {}

ProfilingManager::~ProfilingManager() {}

@@ -54,6 +57,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager &ProfilingMana
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options) {
#ifdef DAVINCI_SUPPORT_PROFILING
vector<int32_t>().swap(device_id_);
subscribe_count_ = 0;
job_id_ = options.job_id;

GELOGI("ProfilingManager::Init job_id:%s", job_id_.c_str());
@@ -382,7 +386,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo(
const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) {
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id) {
#ifdef DAVINCI_SUPPORT_PROFILING
Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter();
if (reporter == nullptr) {
@@ -401,7 +405,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
.append(op_name).append(" ")
.append(std::to_string(block_dim).append(" ")
.append(std::to_string(task_id)).append(" ")
.append(std::to_string(stream_id)).append("\n"));
.append(std::to_string(stream_id)).append(" ")
.append(std::to_string(model_id)).append("\n"));

Msprof::Engine::ReporterData reporter_data{};
reporter_data.deviceId = device_id;
@@ -425,7 +430,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo(
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, const int32_t &device_id) {
uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info, const int32_t &device_id) {
#ifdef DAVINCI_SUPPORT_PROFILING
Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter();
GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return;);
@@ -483,6 +488,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
data.append("\"");
}

data.append(" model_id:").append(std::to_string(model_id));

data.append("\n");

Msprof::Engine::ReporterData reporter_data{};
@@ -537,7 +544,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUn
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData(
const std::vector<TaskDescInfo> &task_desc_info, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info) {
uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info,
bool check_device) {
#ifdef DAVINCI_SUPPORT_PROFILING
int32_t logic_device_id = 0;
rtError_t rt_ret = rtGetDevice(&logic_device_id);
@@ -546,7 +555,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr
return;
}
GELOGI("current logic_device_id:%d", logic_device_id);
if (!is_acl_api_mode_) {
if (check_device) {
auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id);
if (ret == device_id_.end()) {
GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed.");
@@ -554,9 +563,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr
}
}
GELOGI("start ProfilingTaskDescInfo.");
ProfilingTaskDescInfo(task_desc_info, logic_device_id);
ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id);
GELOGI("start ProfilingGraphDescInfo.");
ProfilingGraphDescInfo(compute_graph_desc_info, logic_device_id);
ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id);
GELOGI("Report profiling data for GE end.");
#endif
}
@@ -581,6 +590,105 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t ProfilingManager::GetP
return module;
}

void ProfilingManager::UpdateSubscribeDeviceModuleMap(std::string prof_type,
uint32_t device_id,
uint64_t module) {
#ifdef DAVINCI_SUPPORT_PROFILING
if (prof_type == kProfModelSubscribe) {
if (subs_dev_module_.find(device_id) != subs_dev_module_.end()) {
subs_dev_module_[device_id].subscribe_count++;
} else {
DeviceSubsInfo dev_info;
dev_info.module = module;
dev_info.subscribe_count = 1;
subs_dev_module_[device_id] = dev_info;
}
} else if (prof_type == kProfModelUnsubscribe) {
if (subs_dev_module_.find(device_id) != subs_dev_module_.end()) {
if (subs_dev_module_[device_id].subscribe_count > 0) {
subs_dev_module_[device_id].subscribe_count--;
}
}
} else {
GELOGI("No need to update device_id module map.");
}
#endif
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfModelSubscribe(
uint64_t module, void *model) {
#ifdef DAVINCI_SUPPORT_PROFILING
std::lock_guard<std::mutex> lock(mutex_);
uint64_t model_load_mask = module & PROF_MODEL_LOAD_MASK;
if ((subscribe_count_ == 0) && (model_load_mask == PROF_MODEL_LOAD_MASK)) {
// register framework to profiling
int32_t result = Msprof::Engine::Init(GE_PROFILING_MODULE, &engine_);
if (result != SUCCESS) {
GELOGE(FAILED, "Register profiling engine failed.");
return FAILED;
}
GELOGI("Prof subscribe: model load profiling on.");
}
subscribe_count_++;

auto davinci_model = static_cast<DavinciModel *>(model);
int32_t device_num = 1;
uint32_t device[1];
device[0] = davinci_model->GetDeviceId();
rtError_t rt_ret = rtProfilerStart(module, device_num, device);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(FAILED, "Runtime profiler start failed.");
return FAILED;
}
UpdateSubscribeDeviceModuleMap(kProfModelSubscribe, device[0], module);

// Report profiling data
Status p_ret = davinci_model->ReportProfilingData(false);
if (p_ret != SUCCESS) {
GELOGE(p_ret, "Report profiling data failed.");
return p_ret;
}
#endif
return SUCCESS;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfModelUnsubscribe(
void *model) {
#ifdef DAVINCI_SUPPORT_PROFILING
std::lock_guard<std::mutex> lock(mutex_);
if (subscribe_count_ == 0) {
GELOGW("The profiler has not been subscribed, you do not need to cannel the subscription.");
return SUCCESS;
}

auto davinci_model = static_cast<DavinciModel *>(model);
int32_t dev_num = 1;
uint32_t device[1];
device[0] = davinci_model->GetDeviceId();
auto iter = subs_dev_module_.find(device[0]);
if (iter != subs_dev_module_.end()) {
if (subs_dev_module_[device[0]].subscribe_count == 1) {
rtError_t rt_ret = rtProfilerStop(subs_dev_module_[device[0]].module, dev_num, device);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(FAILED, "Runtime profiler stop failed.");
return FAILED;
}
}
UpdateSubscribeDeviceModuleMap(kProfModelUnsubscribe, device[0], subs_dev_module_[device[0]].module);
}

subscribe_count_--;
if (subscribe_count_ == 0) {
int32_t ret = Msprof::Engine::UnInit(GE_PROFILING_MODULE);
if (ret != SUCCESS) {
GELOGE(ret, "Profiling plugin uninit failed, ret:%d", ret);
return ret;
}
}
#endif
return SUCCESS;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfInit(uint64_t module) {
#ifdef DAVINCI_SUPPORT_PROFILING
std::lock_guard<std::mutex> lock(mutex_);
@@ -748,6 +856,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
device_id_ptr[i] = static_cast<uint32_t>(device_list[i]);
}
GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num);

rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get());
if (rt_ret != RT_ERROR_NONE) {
GELOGE(FAILED, "Runtime profiler config proc failed.");


+ 16
- 4
ge/common/profiling/profiling_manager.h View File

@@ -39,6 +39,10 @@ namespace {
const std::string GE_PROFILING_MODULE = "Framework";
} // namespace
namespace ge {
struct DeviceSubsInfo {
uint64_t module;
uint32_t subscribe_count;
};
// register Plugin
class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY PluginImpl : public Msprof::Engine::PluginIntf {
public:
@@ -73,6 +77,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
ge::Status InitFromOptions(const Options &options);
ge::Status InitFromAclCfg(const std::string &config);
ge::Status StartProfiling(int32_t iter, int32_t device_id);
void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module);
ge::Status ProfModelSubscribe(uint64_t module, void *model);
ge::Status ProfModelUnsubscribe(void *model);
ge::Status ProfInit(uint64_t module);
ge::Status ProfFinalize();
ge::Status ProfStartProfiling(uint64_t module, const std::map<std::string, std::string> &config_para);
@@ -84,13 +91,16 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
bool ProfilingModelLoadOn() const { return is_load_profiling_; }
bool ProfilingModelExecuteOn() const;
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used by command pattern
bool IsAclApiMode() const { return is_acl_api_mode_; }
int32_t GetOpTraceIterNum() const { return op_trace_iter_num_; }
void ReportProfilingData(const std::vector<TaskDescInfo> &task_desc_info,
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info);
void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info,
bool check_device);
void Report(const int32_t &device_id, const string &data, Msprof::Engine::Reporter &reporter,
Msprof::Engine::ReporterData &reporter_data);
void ProfilingTaskDescInfo(const std::vector<TaskDescInfo> &task_desc_info, const int32_t &device_id);
void ProfilingGraphDescInfo(const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info,
void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
const int32_t &device_id);
void ProfilingGraphDescInfo(uint32_t model_id, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info,
const int32_t &device_id);
void SetProfilingConfig(const string &profiling_cfg);
vector<int32_t> GetProfilingDeviceId() const { return device_id_; }
@@ -122,6 +132,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
string task_trace_conf_;
const ProfilingEngineImpl engine_;
map<int32_t, uint64_t> device_id_module_map_; // key: device_id, value: profiling on module
map<uint32_t, DeviceSubsInfo> subs_dev_module_; // key: device_id, value: profiling on module
uint32_t subscribe_count_;
std::mutex mutex_;
};
} // namespace ge


+ 1
- 0
ge/common/types.cc View File

@@ -54,6 +54,7 @@ const std::map<std::string, std::string> PROFILE_COMPONENT_MAP{
{"runtime", RTS_PROFILE},
};
const std::string PROFILE_CONFIG = "config";
const std::string PROFILE_MODEL_ID = "modelId";

REGISTER_OPTYPE_DEFINE(DATA, "Data");
REGISTER_OPTYPE_DEFINE(AIPPDATA, "AippData");


+ 13
- 0
ge/executor/ge_executor.cc View File

@@ -1062,6 +1062,19 @@ Status GeExecutor::ReleaseSingleOpResource(void *stream) {
return SingleOpManager::GetInstance().ReleaseResource(stream);
}

Status GeExecutor::GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id) {
auto model_manager = ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
auto davinci_model = model_manager->GetModel(model_id);
if (davinci_model == nullptr) {
GELOGE(FAILED, "Model id: %d is invaild or model is not loaded.", model_id);
return FAILED;
}

device_id = davinci_model->GetDeviceId();
return SUCCESS;
}

Status GeExecutor::GetBatchInfoSize(uint32_t model_id, size_t &shape_count) {
std::vector<std::vector<int64_t>> batch_info;
int32_t dynamic_type = static_cast<int32_t>(FIXED);


+ 21
- 10
ge/graph/build/label_allocator.cc View File

@@ -32,11 +32,6 @@ Status LabelAllocator::AssignFunctionalLabels() {
return INTERNAL_ERROR;
}

if (compute_graph_->GetGraphUnknownFlag()) {
GELOGD("Graph[%s] is unknown graph, skip label allocator.", compute_graph_->GetName().c_str());
return SUCCESS;
}

// Add label task for sub graph.
GELOGI("AssignFunctionalLabels start: %s.", compute_graph_->GetName().c_str());
std::set<NodePtr> functional_nodes;
@@ -62,7 +57,7 @@ Status LabelAllocator::AssignFunctionalLabels() {
}

(void)AttrUtils::SetInt(*compute_graph_, ATTR_MODEL_LABEL_NUM, label_index);
GELOGI("AssignFunctionalLabels success.");
GELOGI("AssignFunctionalLabels success, Num: %u.", label_index);
return SUCCESS;
}

@@ -72,13 +67,29 @@ bool LabelAllocator::CollectFunctionalNode(ComputeGraphPtr &graph, std::set<Node
return false;
}

NodePtr parent = graph->GetParentNode();
if (parent == nullptr) {
GELOGE(INTERNAL_ERROR, "ComputeGraph owner not set: %s.", graph->GetName().c_str());
if (graph->GetGraphUnknownFlag()) {
GELOGD("Graph[%s] is unknown graph, skip label allocator.", graph->GetName().c_str());
return true;
}

NodePtr func_node = graph->GetParentNode();
if (func_node == nullptr) {
GELOGE(INTERNAL_ERROR, "Parent functional node not set: %s.", graph->GetName().c_str());
return false;
}

(void)functional_nodes.insert(parent); // unique functional node.
ComputeGraphPtr owner_graph = func_node->GetOwnerComputeGraph();
if (owner_graph == nullptr) {
GELOGE(INTERNAL_ERROR, "ComputeGraph owner not set: %s.", func_node->GetName().c_str());
return false;
}

if (owner_graph->GetGraphUnknownFlag()) {
GELOGD("Graph[%s] is unknown graph, skip label allocator.", owner_graph->GetName().c_str());
return true;
}

(void)functional_nodes.insert(func_node); // unique functional node.
return true;
}
} // namespace ge

+ 9
- 0
ge/graph/build/memory/block_mem_assigner.cc View File

@@ -880,6 +880,15 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
GELOGI("Unreusable block.");
continue;
}
std::string batch_label;
if (reusable_block->IsSameLabel(batch_label)) {
std::string op_label;
(void)ge::AttrUtils::GetStr(node_op_desc, ATTR_NAME_BATCH_LABEL, op_label);
if (batch_label != op_label) {
GELOGI("label diff, op name %s", node_op_desc->GetName().c_str());
continue;
}
}

// A node can reuse blocks of the same stream and preorder streams
if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) {


+ 10
- 2
ge/graph/build/model_builder.cc View File

@@ -416,6 +416,14 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) {
return FAILED);
GELOGI("For model, max_mem_offset_: %zu, p2p_mem_size: %zu, zero_copy_mem_size_: %zu", max_mem_offset_,
p2p_mem_offset_, zero_copy_mem_size_);
string fp_ceiling_mode;
if (ge::GetContext().GetOption("ge.fpCeilingMode", fp_ceiling_mode) == SUCCESS) {
if (!ge::AttrUtils::SetStr(&model, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) {
GELOGE(FAILED, "Failed to set attr ATTR_FP_CEILING_MODE");
return FAILED;
}
GELOGI("Set attr ATTR_FP_CEILING_MODE to model, value is %s.", fp_ceiling_mode.c_str());
}

string ge_core_type;
Status ret = ge::GetContext().GetOption(kCoreType, ge_core_type);
@@ -690,8 +698,8 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) {
GE_TIMESTAMP_END(AssignLogicalStreams, "GraphBuilder::AssignLogicalStreams");

// Assign functional op labels.
label_num_ = 0;
(void)AttrUtils::GetInt(*compute_graph_, ATTR_MODEL_LABEL_NUM, label_num_);
auto root_graph = GraphUtils::FindRootGraph(compute_graph_);
(void)AttrUtils::GetInt(*root_graph, ATTR_MODEL_LABEL_NUM, label_num_);

GE_TIMESTAMP_START(AssignMemory);
MemoryAssigner mem_assigner(compute_graph_);


+ 9
- 0
ge/graph/common/transop_util.cc View File

@@ -82,4 +82,13 @@ bool TransOpUtil::CheckPrecisionLoss(const ge::NodePtr &src_node) {
}
return true;
}

std::string TransOpUtil::TransopMapToString() {
std::string buffer;
for (auto &key : Instance().transop_index_map_) {
buffer += key.first + " ";
}
return buffer;
}

} // namespace ge

+ 2
- 0
ge/graph/common/transop_util.h View File

@@ -35,6 +35,8 @@ class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY TransOpUtil {

static bool CheckPrecisionLoss(const NodePtr &src_node);

static std::string TransopMapToString();

private:
TransOpUtil();



+ 1
- 0
ge/graph/load/new_model_manager/data_dumper.h View File

@@ -86,6 +86,7 @@ class DataDumper {
void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; }
const DumpProperties &GetDumpProperties() const { return dump_properties_; }
bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const;
const std::vector<OpDescInfo> &GetAllOpDescInfo() const { return op_desc_info_; }

// Dump exception info
Status DumpExceptionInput(const OpDescInfo &op_desc_info, const string &dump_file);


+ 70
- 87
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -88,6 +88,7 @@ const uint32_t kDataMemAlignSizeCompare = 64;
const uint32_t kDumpL1FusionOpMByteSize = 2 * 1024 * 1024;
const uint32_t kDumpFlagOfL1Fusion = 0;
const char *const kDefaultBatchLable = "Batch_default";
const int32_t kInvalidStream = -1;

inline bool IsDataOp(const std::string &node_type) {
return node_type == DATA_TYPE || node_type == AIPP_DATA_TYPE || node_type == ANN_DATA_TYPE;
@@ -258,7 +259,6 @@ Status DavinciModel::Assign(const GeModelPtr &ge_model) {
///
void DavinciModel::Shrink() {
ge_model_.reset(); // delete object.
op_list_.clear();
}

Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) {
@@ -611,7 +611,9 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size

GE_DISMISS_GUARD(stream);
stream_list_.push_back(stream);
GELOGD("Stream index:%u, stream:%p.", i, stream);
int32_t rt_stream_id = kInvalidStream;
(void)rtGetStreamId(stream, &rt_stream_id);
GELOGI("Logical stream index:%u, stream:%p, rtstream: %d.", i, stream, rt_stream_id);
}

for (uint32_t i = 0; i < EventNum(); i++) {
@@ -653,18 +655,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
GE_IF_BOOL_EXEC(IsBroadCastOpData(node),
(void)ge::AttrUtils::SetStr(op_desc, VAR_ATTR_VAR_IS_BROADCAST, "var_is_restore"););
}
// for profiling
op_name_map_ = compute_graph->GetGraphOpName();

vector<string> op_name;
GE_IF_BOOL_EXEC(ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_TASK_INDEX_OP_NAME, op_name),
GELOGI("get str of task_index_op_name"));
if (op_name_map_.empty()) {
for (size_t idx = 0; idx < op_name.size(); idx++) {
op_name_map_[idx] = op_name[idx];
}
GELOGI("Infer profiling: op_name_size(%zu)", op_name.size());
}

GE_CHK_STATUS_RET(InitNodes(compute_graph), "Init nodes failed");

@@ -676,7 +666,9 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
auto all_dump_model = GetDumpProperties().GetAllDumpModel();
bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end();
bool findByModelName = all_dump_model.find(name_) != all_dump_model.end();
if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || findByOmName || findByModelName) {
bool dump_l1fusion_op = (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) ||
findByOmName || findByModelName;
if (dump_l1fusion_op) {
// malloc 2M for dump l1fusion op
GE_CHK_RT_RET(rtMalloc(&l1_fusion_addr_, kDumpL1FusionOpMByteSize, RT_MEMORY_DDR));

@@ -690,16 +682,21 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
need_destroy_aicpu_kernel_ = IsAicpuKernelConnectSpecifiedLayer();
(void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_);

string fp_ceiling_mode;
if (ge::AttrUtils::GetStr(ge_model_, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) {
GELOGI("Get attr ATTR_FP_CEILING_MODE from model, value is %s.", fp_ceiling_mode.c_str());
// mode 0: Do not perform saturation processing. By default, IEEE754 is used.
GE_CHK_RT_RET(rtSetCtxINFMode((fp_ceiling_mode != "0")));
}

// collect profiling for ge
if (ProfilingManager::Instance().ProfilingModelLoadOn()) {
std::vector<ComputeGraphDescInfo> compute_graph_desc_info;
Status ret1 = GetComputeGraphInfo(compute_graph, compute_graph_desc_info);
if (ret1 != SUCCESS) {
GELOGE(ret1, "GetComputeGraphInfo failed.");
return ret1;
auto &profiling_manager = ProfilingManager::Instance();
if (profiling_manager.ProfilingModelLoadOn()) {
Status p_ret = ReportProfilingData(!profiling_manager.IsAclApiMode());
if (p_ret != SUCCESS) {
GELOGE(p_ret, "Report profiling data failed.");
return p_ret;
}
ProfilingManager::Instance().ReportProfilingData(GetTaskDescInfo(), compute_graph_desc_info);
GE_CHK_STATUS(SinkModelProfile(), "Sink model profile failed.");
}

Shrink();
@@ -707,6 +704,20 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
return ret;
}

Status DavinciModel::ReportProfilingData(bool check_device) {
std::vector<ComputeGraphDescInfo> compute_graph_desc_info;
Status ret = GetComputeGraphInfo(compute_graph_desc_info);
if (ret != SUCCESS) {
GELOGE(ret, "GetComputeGraphInfo failed.");
return ret;
}
ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info, check_device);
GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed.");
op_list_.clear();

return SUCCESS;
}

///
/// @ingroup ge
/// @brief Travel all nodes and determine if destruction is required.
@@ -2900,34 +2911,25 @@ Status DavinciModel::DistributeTask() {
SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs());
}
}
// get op_name by task_index
if (task->GetCtx() != nullptr) {
auto iter = op_name_map_.find(task_index);
if (iter == op_name_map_.end()) {
continue;
}

// else task index is found in op_name_map_
TaskDescInfo task_desc_info;
string op_name = op_name_map_[task_index];
if (!om_name_.empty()) {
task_desc_info.model_name = om_name_;
} else {
task_desc_info.model_name = name_;
}
task_desc_info.op_name = op_name;
task_desc_info.block_dim = model_task_def->task(task_index).kernel().block_dim();
task_desc_info.task_id = task->GetTaskID();
task_desc_info.stream_id = task->GetStreamId();
task_desc_info_.emplace_back(task_desc_info);
if (flag) {
if (task->GetSktTaskID() != 0xFFFFFFFF) {
TaskDescInfo task_desc_info;
string op_name = "super_kernel_" + to_string(task_index);
task_desc_info.op_name = op_name;
task_desc_info.task_id = task->GetSktTaskID();
task_desc_info_.emplace_back(task_desc_info);
}
// Load task info for profiling
TaskDescInfo task_desc_info;
if (!om_name_.empty()) {
task_desc_info.model_name = om_name_;
} else {
task_desc_info.model_name = name_;
}
task_desc_info.op_name = op->GetName();
task_desc_info.block_dim = model_task_def->task(task_index).kernel().block_dim();
task_desc_info.task_id = task->GetTaskID();
task_desc_info.stream_id = task->GetStreamId();
task_desc_info_.emplace_back(task_desc_info);
if (flag) {
if (task->GetSktTaskID() != 0xFFFFFFFF) {
TaskDescInfo task_desc_info;
string op_name = "super_kernel_" + to_string(task_index);
task_desc_info.op_name = op_name;
task_desc_info.task_id = task->GetSktTaskID();
task_desc_info_.emplace_back(task_desc_info);
}
}
}
@@ -3817,50 +3819,31 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea
main_follow_stream_mapping_[main_stream_id].emplace_back(stream);
}

Status DavinciModel::GetComputeGraphInfo(const ComputeGraphPtr &graph, vector<ComputeGraphDescInfo> &graph_desc_info) {
Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info) {
GELOGI("GetComputeGraphInfo start.");
for (auto &node : graph->GetAllNodes()) {
auto &all_op_desc = data_dumper_.GetAllOpDescInfo();
for (auto &op_desc : all_op_desc) {
ComputeGraphDescInfo compute_graph_info;
auto op_desc = node->GetOpDesc();
if (op_desc == nullptr) {
GELOGE(PARAM_INVALID, "op_desc is nullptr.");
return PARAM_INVALID;
if (!om_name_.empty()) {
compute_graph_info.model_name = om_name_;
} else {
compute_graph_info.model_name = name_;
}
compute_graph_info.op_name = op_desc.op_name;
compute_graph_info.op_type = op_desc.op_type;
compute_graph_info.input_format = op_desc.input_format;
compute_graph_info.input_shape = op_desc.input_shape;
compute_graph_info.input_data_type = op_desc.input_data_type;
compute_graph_info.output_format = op_desc.output_format;
compute_graph_info.output_shape = op_desc.output_shape;
compute_graph_info.output_data_type = op_desc.output_data_type;

auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID);
if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) &&
op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) {
if (!om_name_.empty()) {
compute_graph_info.model_name = om_name_;
} else {
compute_graph_info.model_name = name_;
}
compute_graph_info.op_name = op_desc->GetName();
compute_graph_info.op_type = op_desc->GetType();

for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i);
if (input_desc == nullptr) {
continue;
}
compute_graph_info.input_format.emplace_back(input_desc->GetFormat());
compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims());
compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType());
}

for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) {
GeTensorDesc output_desc = op_desc->GetOutputDesc(j);
compute_graph_info.output_format.emplace_back(output_desc.GetFormat());
compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims());
compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType());
}

graph_desc_info.emplace_back(compute_graph_info);
}
graph_desc_info.emplace_back(compute_graph_info);
}
GELOGI("GetComputeGraphInfo end.");
return SUCCESS;
}

void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size) {
if (tensor_name_to_fixed_addr_size_.find(tensor_name) == tensor_name_to_fixed_addr_size_.end()) {
tensor_name_to_fixed_addr_size_[tensor_name] = total_fixed_addr_size_;


+ 3
- 2
ge/graph/load/new_model_manager/davinci_model.h View File

@@ -439,6 +439,8 @@ class DavinciModel {

Status SinkTimeProfile(const InputData &current_data);

Status ReportProfilingData(bool check_device = true);

void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) {
data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id);
}
@@ -830,7 +832,7 @@ class DavinciModel {
Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id);

// get desc info of graph for profiling
Status GetComputeGraphInfo(const ComputeGraphPtr &graph, vector<ComputeGraphDescInfo> &graph_desc_info);
Status GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info);

void SetDataDumperArgs(const ComputeGraphPtr &compute_graph);

@@ -949,7 +951,6 @@ class DavinciModel {
std::map<std::string, uint32_t> used_tbe_handle_map_;

// for profiling task and graph info
std::map<uint32_t, std::string> op_name_map_;
std::vector<TaskDescInfo> task_desc_info_;

int64_t maxDumpOpNum_;


+ 82
- 11
ge/graph/load/new_model_manager/model_manager.cc View File

@@ -43,6 +43,8 @@ const std::string kCmdTypeProfInit = "prof_init";
const std::string kCmdTypeProfFinalize = "prof_finalize";
const std::string kCmdTypeProfStart = "prof_start";
const std::string kCmdTypeProfStop = "prof_stop";
const std::string kCmdTypeProfModelSubscribe = "prof_model_subscribe";
const std::string kCmdTypeProfModelUnsubscribe = "prof_model_cancel_subscribe";
const char *const kBatchLoadBuf = "batchLoadsoFrombuf";
const char *const kDeleteCustOp = "deleteCustOp";
struct CustAicpuSoBuf {
@@ -334,11 +336,9 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge

GELOGI("Parse model %u success.", model_id);

if (ProfilingManager::Instance().ProfilingModelLoadOn()) {
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 +
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond
davinci_model->SetProfileTime(MODEL_LOAD_END);
}
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 +
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond
davinci_model->SetProfileTime(MODEL_LOAD_END);
} while (0);

GE_CHK_RT(rtDeviceReset(static_cast<int32_t>(GetContext().DeviceId())));
@@ -565,7 +565,9 @@ Status ModelManager::HandleCommand(const Command &command) {
{kCmdTypeProfile, HandleProfileCommand}, {kCmdTypeDump, HandleDumpCommand},
{kCmdTypeProfiling, HandleAclProfilingCommand}, {kCmdTypeProfInit, HandleProfInitCommand},
{kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand},
{kCmdTypeProfStop, HandleProfStopCommand}};
{kCmdTypeProfStop, HandleProfStopCommand},
{kCmdTypeProfModelSubscribe, HandleProfModelSubscribeCommand},
{kCmdTypeProfModelUnsubscribe, HandleProfModelUnsubscribeCommand}};

auto iter = cmds.find(command.cmd_type);
if (iter == cmds.end()) {
@@ -591,6 +593,77 @@ Status ModelManager::HandleAclProfilingCommand(const Command &command) {
return SUCCESS;
}

Status ModelManager::GetModelByCmd(const Command &command,
std::shared_ptr<DavinciModel> &davinci_model) {
if (command.cmd_params.size() < kCmdParSize) {
GELOGE(PARAM_INVALID, "When the cmd_type is '%s', the size of cmd_params must larger than 2.",
command.cmd_type.c_str());
return PARAM_INVALID;
}

std::string map_key = command.cmd_params[0];
std::string value = command.cmd_params[1];
if (map_key == PROFILE_MODEL_ID) {
int32_t model_id = 0;
try {
model_id = std::stoi(value);
} catch (std::invalid_argument &) {
GELOGE(PARAM_INVALID, "Model id: %s is invalid.", value.c_str());
return PARAM_INVALID;
} catch (std::out_of_range &) {
GELOGE(PARAM_INVALID, "Model id: %s is out of range.", value.c_str());
return PARAM_INVALID;
} catch (...) {
GELOGE(FAILED, "Model id: %s cannot change to int.", value.c_str());
return FAILED;
}

auto model_manager = ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
davinci_model = model_manager->GetModel(static_cast<uint32_t>(model_id));
if (davinci_model == nullptr) {
GELOGE(FAILED, "Model id: %d is invaild or model is not loaded.", model_id);
return FAILED;
}
} else {
GELOGE(FAILED, "The model_id parameter is not found in the command.");
return FAILED;
}

return SUCCESS;
}

Status ModelManager::HandleProfModelSubscribeCommand(const Command &command) {
std::shared_ptr<DavinciModel> davinci_model = nullptr;
Status ret = GetModelByCmd(command, davinci_model);
if (ret != SUCCESS) {
return ret;
}

if (ProfilingManager::Instance().ProfModelSubscribe(command.module_index,
static_cast<void *>(davinci_model.get())) != SUCCESS) {
GELOGE(FAILED, "Handle prof model subscribe failed.");
return FAILED;
}

return SUCCESS;
}

Status ModelManager::HandleProfModelUnsubscribeCommand(const Command &command) {
std::shared_ptr<DavinciModel> davinci_model = nullptr;
Status ret = GetModelByCmd(command, davinci_model);
if (ret != SUCCESS) {
return ret;
}

if (ProfilingManager::Instance().ProfModelUnsubscribe(static_cast<void *>(davinci_model.get())) != SUCCESS) {
GELOGE(FAILED, "Handle prof model unsubscribe failed.");
return FAILED;
}

return SUCCESS;
}

Status ModelManager::HandleProfInitCommand(const Command &command) {
uint64_t module_index = command.module_index;
if (ProfilingManager::Instance().ProfInit(module_index) != SUCCESS) {
@@ -973,11 +1046,9 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model

GELOGI("Parse model %u success.", model_id);

if (ProfilingManager::Instance().ProfilingModelLoadOn()) {
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 +
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond
davinci_model->SetProfileTime(MODEL_LOAD_END);
}
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 +
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond
davinci_model->SetProfileTime(MODEL_LOAD_END);

GE_IF_BOOL_EXEC(ret == SUCCESS, device_count++);
return SUCCESS;


+ 5
- 0
ge/graph/load/new_model_manager/model_manager.h View File

@@ -158,10 +158,15 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
static ge::Status HandleAclProfilingCommand(const Command &command);
static ge::Status HandleProfileCommand(const Command &command);
static ge::Status HandleDumpCommand(const Command &command);
static ge::Status HandleProfModelSubscribeCommand(const Command &command);
static ge::Status HandleProfModelUnsubscribeCommand(const Command &command);
static ge::Status HandleProfInitCommand(const Command &command);
static ge::Status HandleProfFinalizeCommand(const Command &command);
static ge::Status HandleProfStartCommand(const Command &command);
static ge::Status HandleProfStopCommand(const Command &command);

static ge::Status GetModelByCmd(const Command &command,
std::shared_ptr<DavinciModel> &davinci_model);
///
/// @ingroup domi_ome
/// @brief get model memory usage


+ 1
- 1
ge/graph/load/new_model_manager/zero_copy_task.cc View File

@@ -45,7 +45,7 @@ Status ZeroCopyTask::SetTaskArgsOffset(uintptr_t addr, size_t offset) {
if (it == task_addr_offset_.end()) {
task_addr_offset_[addr] = {offset};
} else {
it->second.push_back(offset);
it->second.insert(offset);
}

GELOGI("[ZCPY] %s set task, virtual_addr: 0x%lx, args_addr: %p, size: %zu, offset: %zu", name_.c_str(), addr,


+ 1
- 1
ge/graph/load/new_model_manager/zero_copy_task.h View File

@@ -103,7 +103,7 @@ class ZeroCopyTask {
bool is_updated_;
string batch_label_;
// <address from Op, {offset in args}>
map<uintptr_t, vector<size_t>> task_addr_offset_;
map<uintptr_t, set<size_t>> task_addr_offset_;
};
} // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_TASK_H_

+ 22
- 0
ge/graph/manager/graph_manager.cc View File

@@ -133,6 +133,22 @@ bool IsTailingOptimization() {
GELOGW("OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION not set, use BFSTopologicalSorting by default.");
return false;
}

ge::Status CheckFpCeilingMode() {
static const std::unordered_set<std::string> kValidFpCeilingMode = {"0", "1", "2"};
string mode;
auto ret = ge::GetContext().GetOption("ge.fpCeilingMode", mode);
if (ret == ge::GRAPH_SUCCESS) {
if (kValidFpCeilingMode.count(mode) == 0) {
GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "The fp_ceiling_mode %s is invalid, options are 0, 1, and 2.", mode.c_str());
return ge::GE_GRAPH_OPTIONS_INVALID;
}
GELOGI("The parameter fp_ceiling_mode is set to %s.", mode.c_str());
return ge::SUCCESS;
}
GELOGW("The parameter fp_ceiling_mode is not set.");
return ge::SUCCESS;
}
} // namespace

namespace ge {
@@ -168,6 +184,12 @@ Status GraphManager::Initialize(const std::map<string, string> &options) {
return ret;
}

ret = CheckFpCeilingMode();
if (ret != SUCCESS) {
GELOGE(ret, "[Initialize] Check fp-ceiling-mode options failed.");
return ret;
}

ret = graph_context_->Initialize(options);
if (ret != SUCCESS) {
GELOGE(ret, "[Initialize] GraphContext initialize failed.");


+ 6
- 1
ge/graph/passes/memcpy_addr_async_pass.cc View File

@@ -25,6 +25,10 @@
namespace ge {
Status MemcpyAddrAsyncPass::Run(ComputeGraphPtr graph) {
GE_CHECK_NOTNULL(graph);
if (graph->GetGraphUnknownFlag()) {
GELOGD("Graph[%s] is unknown graph, skip.", graph->GetName().c_str());
return SUCCESS;
}

int64_t value = 0;
rtError_t rt_ret = rtGetRtCapability(FEATURE_TYPE_MEMCPY, MEMCPY_INFO_SUPPORT_ZEROCOPY, &value);
@@ -201,9 +205,10 @@ NodePtr MemcpyAddrAsyncPass::CreateMemcpyAddrAsyncNode(const ComputeGraphPtr &gr
const OutDataAnchorPtr &out_data_anchor,
const NodePtr &out_of_user_data) {
GELOGD("Start CreateMemcpyAddrAsyncNode.");
static uint32_t new_node_index = 0;
OpDescPtr pre_op_desc = out_data_anchor->GetOwnerNode()->GetOpDesc();
GE_CHK_BOOL_EXEC(pre_op_desc != nullptr, return nullptr, "Op_desc of pre node is invalid.");
std::string node_name = pre_op_desc->GetName() + "_" + MEMCPYADDRASYNC;
std::string node_name = pre_op_desc->GetName() + "_" + MEMCPYADDRASYNC + "_" + std::to_string(new_node_index++);

OpDescPtr op_desc = MakeShared<OpDesc>(node_name, MEMCPYADDRASYNC);
GE_CHECK_NOTNULL_EXEC(op_desc, return nullptr);


+ 7
- 1
ge/graph/passes/net_output_pass.cc View File

@@ -103,6 +103,12 @@ Status NetOutputPass::GetOutputNode(const ge::ComputeGraphPtr &graph, std::vecto
GELOGI("user set out node [%s] is found in user def targets, out node is prio!", ele.first->GetName().c_str());
targets_.erase(iter);
}

auto op_desc = ele.first->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
if (op_desc->HasAttr(ATTR_ATC_USER_DEFINE_OUTPUT_NODES)) {
is_user_define_ouput_nodes = true;
}
output_nodes_info.push_back({ele.first, ele.second, -1});
}
GELOGI("Output node set by user or leaf node, size:%zu.", output_nodes_info.size());
@@ -414,7 +420,7 @@ Status NetOutputPass::ProcessWithNetoutput(const ge::ComputeGraphPtr &graph, con
Status NetOutputPass::AddCtrlEdgesBetweenLeafAndNetOutput(const ge::ComputeGraphPtr &graph,
const ge::NodePtr &net_out_node) {
GE_CHECK_NOTNULL(net_out_node);
if (!GetLocalOmgContext().user_out_nodes.empty()) {
if (!GetLocalOmgContext().user_out_nodes.empty() || is_user_define_ouput_nodes) {
GELOGI("No need to add ctrl edge to netoutput because user out nodes have been set.");
return SUCCESS;
}


+ 1
- 0
ge/graph/passes/net_output_pass.h View File

@@ -220,6 +220,7 @@ class NetOutputPass : public GraphPass {
bool is_include_special_node_ = false;
std::set<NodePtr> targets_;
friend class ReUpdateNetOutputPass;
bool is_user_define_ouput_nodes = false;
};
} // namespace ge
#endif // GE_GRAPH_PASSES_NET_OUTPUT_PASS_H_

+ 57
- 6
ge/graph/preprocess/graph_preprocess.cc View File

@@ -117,7 +117,6 @@
#include "graph/passes/variable_op_pass.h"
#include "graph/passes/variable_prepare_op_pass.h"
#include "graph/passes/variable_ref_delete_op_pass.h"
#include "graph/passes/mark_agnostic_pass.h"


namespace ge {
@@ -219,6 +218,9 @@ NodePtr CreateTransNode(const std::string &name, const std::string &node_type, c

auto index = TransOpUtil::GetTransOpDataIndex(node_type);
if (index < 0) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E19025", {"situation", "reason"},
{"The trans node type[" + node_type + "]", "it must be " + TransOpUtil::TransopMapToString()});
GELOGE(INTERNAL_ERROR, "The trans node type %s does not exists", node_type.c_str());
return nullptr;
}
@@ -387,6 +389,8 @@ Status RecoverTransRoadForVar(const NodePtr &var, const VarTransRoad &road) {
auto trans_name = var->GetName() + "_trans_" + std::to_string(index++);
auto ret = RecoverOneTransNodeForVar(trans_name, *iter, last_node, last_node);
if (ret != SUCCESS) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E15001", {"variable", "index", "type"}, {var->GetName(), std::to_string(index), iter->node_type});
GELOGE(INTERNAL_ERROR, "Failed to recover trans node for variable %s, index %d, type %s", var->GetName().c_str(),
index, iter->node_type.c_str());
return INTERNAL_ERROR;
@@ -419,6 +423,8 @@ Status RecoverTransRoadForVarRef(const std::set<NodePtr> &nodes, const VarTransR
auto trans_name = var->GetName() + "_trans_" + std::to_string(index++);
auto ret = RecoverOneTransNodeForVarRef(trans_name, *iter, last_node, last_node);
if (ret != SUCCESS) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E15001", {"variable", "index", "type"}, {var->GetName(), std::to_string(index), iter->node_type});
GELOGE(INTERNAL_ERROR, "Failed to recover trans node for variable %s, index %d, type %s",
var->GetName().c_str(), index, iter->node_type.c_str());
return INTERNAL_ERROR;
@@ -571,6 +577,8 @@ Status CheckIfDynamicBatchScene(NodePtr &data_node, bool &is_dynamic_batch, Node
std::string related_node_name;
if (AttrUtils::GetStr(data_node->GetOpDesc(), kMbatchSwitchnName, related_node_name)) {
if (related_node_name.empty()) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E15002", {"opname", "value", "reason"}, {data_node->GetName(), "flag", "but the value is empty"});
GELOGE(INTERNAL_ERROR, "The data node %s has switchn node flag, but the value is empty",
data_node->GetName().c_str());
return INTERNAL_ERROR;
@@ -582,6 +590,9 @@ Status CheckIfDynamicBatchScene(NodePtr &data_node, bool &is_dynamic_batch, Node
}
}
if (switchn_node == nullptr) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E15002", {"opname", "value", "reason"},
{data_node->GetName(), related_node_name, "but can not find it on the graph"});
GELOGE(INTERNAL_ERROR, "The data node %s has switchn node %s, but can not find it on the graph",
data_node->GetName().c_str(), related_node_name.c_str());
return INTERNAL_ERROR;
@@ -682,6 +693,10 @@ Status ProcessInputNC1HWC0DynShape(NodePtr &node_ptr, bool &is_dynamic_batch, No
ge::GeShape old_shape = input->GetShape();
bool support = ((old_format == FORMAT_NC1HWC0) || (old_format == FORMAT_NCHW) || (old_format == FORMAT_NHWC));
if (!support) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E19014", {"opname", "value", "reason"},
{op_desc->GetName(), "format[" + TypeUtils::FormatToSerialString(old_format) + "]",
"only support FORMAT_NC1HWC0,FORMAT_NCHW,FORMAT_NHWC"});
GELOGE(INTERNAL_ERROR, "The format [%s] is unsupported", TypeUtils::FormatToSerialString(old_format).c_str());
return FAILED;
}
@@ -762,6 +777,9 @@ Status GetStorageFormatAndShape(OpDescPtr &op_desc, const GeTensorDescPtr &tenso
op_desc->GetName().c_str(), TypeUtils::FormatToSerialString(storage_format).c_str(),
formats::JoinToString(storage_shape).c_str());
} else {
ErrorManager::GetInstance().ATCReportErrMessage(
"15003", {"opname", "format"},
{op_desc->GetName(), TypeUtils::FormatToSerialString(storage_format)});
GELOGE(PARAM_INVALID, "Update node by storage format failed, storage_shape not set. "
"node: [%s], storage_format [%s]",
op_desc->GetName().c_str(), TypeUtils::FormatToSerialString(storage_format).c_str());
@@ -900,9 +918,14 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) {
// check if is_output_adjust_hw_layout is set
if (NeedUpdateFormatByOutputTypeParm(op_desc, index)) {
if ((old_format != FORMAT_NCHW) && (old_format != FORMAT_NHWC) && (old_format != FORMAT_NC1HWC0)) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E19014", {"opname", "value", "reason"},
{op_desc->GetName(), "format[" + TypeUtils::FormatToSerialString(old_format) + "]",
"only support FORMAT_NC1HWC0,FORMAT_NCHW,FORMAT_NHWC"});
GELOGE(INTERNAL_ERROR, "Format is not one of NCHW, NHWC, NC1HWC0.");
return FAILED;
}

GeTensorDesc old_desc(old_shape, old_format, old_dtype);
if (ProcessNetoutputNodeFp16Nc1hwc0DynShape(old_desc, net_output_input_desc, src_node) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Process netoutput fp16 nc1hwc0.");
@@ -1035,6 +1058,9 @@ Status GraphPrepare::CheckRefInputNode(const NodePtr &node, const std::string &i
}
bool is_acceptable = (acceptable_types.find(input_type) != acceptable_types.end());
if (!is_acceptable) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E15005", {"opname", "optype", "opname1", "optype1"},
{op_desc->GetName(), node->GetType(), input_op_desc->GetName(), input_op_desc->GetType()});
GELOGE(PARAM_INVALID, "The ref input of ref node %s[%s] must be ref node or variable, but %s[%s]isn't.",
node->GetName().c_str(), node->GetType().c_str(), input_op_desc->GetName().c_str(),
input_op_desc->GetType().c_str());
@@ -1127,6 +1153,9 @@ Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input) {
}

if ((index < 0) || (static_cast<size_t>(index) >= user_input.size())) {
std::string situation = "data op index[" + std::to_string(index) + "]";
std::string reason = "it must less than user_input size[" + std::to_string(user_input.size()) + "]";
ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason});
GELOGE(PARAM_INVALID, "user_input size = %zu, graph data op index = %ld.", user_input.size(), index);
return FAILED;
}
@@ -1139,6 +1168,9 @@ Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input) {
if (need_check_internal_format) {
bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format);
if (is_internal) {
ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"},
{"Input format[" + TypeUtils::FormatToSerialString(format) + "] or origin_format[" +
TypeUtils::FormatToSerialString(origin_format) + "]", "it is not support"});
GELOGE(PARAM_INVALID, "Input format %s or origin_format %s is not support.",
TypeUtils::FormatToSerialString(format).c_str(),
TypeUtils::FormatToSerialString(origin_format).c_str());
@@ -1150,6 +1182,8 @@ Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input) {
uint32_t length = 1;
bool type_ret = TypeUtils::GetDataTypeLength(data_type, length);
if (!type_ret) {
ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"},
{"Input datatype[" + TypeUtils::DataTypeToSerialString(data_type) + "]", "it is not support"});
GELOGE(PARAM_INVALID, "Input datatype %s is not support.",
TypeUtils::DataTypeToSerialString(data_type).c_str());
return FAILED;
@@ -1164,6 +1198,10 @@ Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input) {
return FAILED);
bool size_check = (size != 0 && shape_size != size);
if (size_check) {
std::string situation = "input data size[" + std::to_string(size) +
"] and shape_size[" + std::to_string(size) + "]";
std::string reason = "because size != 0 and shape_size != size";
ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason});
GELOGE(PARAM_INVALID, "input data size =%ld, shape_size =%ld.", size, shape_size);
return FAILED;
}
@@ -1503,6 +1541,8 @@ Status GraphPrepare::VerifyConstOp(const NodePtr &node) {
uint32_t length = 1;
bool type_ret = TypeUtils::GetDataTypeLength(data_type, length);
if (!type_ret) {
ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"},
{"Input datatype[" + TypeUtils::DataTypeToSerialString(data_type) + "]", "it is not support"});
GELOGE(PARAM_INVALID, "Input datatype %s is not support.", TypeUtils::DataTypeToSerialString(data_type).c_str());
return FAILED;
}
@@ -1512,14 +1552,20 @@ Status GraphPrepare::VerifyConstOp(const NodePtr &node) {
if (shape_size == 0) {
if (ge_tensor_desc.GetShape().GetDims().size() == 0) {
// shape = [], means it's a sclar tensor.
GE_CHK_BOOL_EXEC(data_size / length == 1, return PARAM_INVALID, "Const is invalid scalar tensor.");
GE_CHK_BOOL_EXEC(data_size / length == 1,
ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {"Const is invalid scalar tensor."});
return PARAM_INVALID, "Const is invalid scalar tensor.");
} else {
// shape = [x, y, 0,...], means it's a vector tensor that value is [].
GE_CHK_BOOL_EXEC(data_size == 0, return PARAM_INVALID, "Const is invalid vector scalar.");
GE_CHK_BOOL_EXEC(data_size == 0,
ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {"Const is invalid vector scalar."});
return PARAM_INVALID, "Const is invalid vector scalar.");
}
} else {
GE_CHK_BOOL_EXEC(data_size == static_cast<size_t>(shape_size * length) && data_size != 0, return PARAM_INVALID,
"Const input data size is not equal with tensor desc shape");
GE_CHK_BOOL_EXEC(data_size == static_cast<size_t>(shape_size * length) && data_size != 0,
ErrorManager::GetInstance().ATCReportErrMessage(
"E10043", {"reason"}, {"Const input data size is not equal with tensor desc shape"});
return PARAM_INVALID, "Const input data size is not equal with tensor desc shape");
}
return SUCCESS;
}
@@ -1543,6 +1589,9 @@ Status GraphPrepare::CheckUserInput(const std::vector<GeTensor> &user_input) {
return GE_GRAPH_INIT_FAILED;
}
if ((index < 0) || (static_cast<size_t>(index) >= user_input.size())) {
std::string situation = "data op index[" + std::to_string(index) + "]";
std::string reason = "it must less than user_input size[" + std::to_string(user_input.size()) + "]";
ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason});
GELOGE(GE_GRAPH_INIT_FAILED, "user_input size:%zu, data op index:%ld.", user_input.size(), index);
return GE_GRAPH_INIT_FAILED;
}
@@ -1550,6 +1599,9 @@ Status GraphPrepare::CheckUserInput(const std::vector<GeTensor> &user_input) {

for (size_t i = 0; i < desc.GetShape().GetDimNum(); ++i) {
if (desc.GetShape().GetDim(i) < 0) {
std::string situation = "data dim[" + std::to_string(i) + "][" + std::to_string(desc.GetShape().GetDim(i)) + "]" ;
std::string reason = "it need >= 0";
ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {situation, reason});
GELOGE(GE_GRAPH_INIT_FAILED, "data dim %zu is not supported, need >= 0, real:%ld.", i,
desc.GetShape().GetDim(i));
return GE_GRAPH_INIT_FAILED;
@@ -1627,7 +1679,6 @@ Status GraphPrepare::PrepareOptimize() {
try {
(void)original_graph_passes.AddPass("PrepareOptimize::ShapeOperateOpRemovePass", new ShapeOperateOpRemovePass);
(void)original_graph_passes.AddPass("PrepareOptimize::ReplaceTransShapePass", new ReplaceTransShapePass);
(void)original_graph_passes.AddPass("PrepareOptimize::MarkAgnosticPass", new MarkAgnosticPass);
} catch (std::bad_alloc &e) {
GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs.");
return INTERNAL_ERROR;


+ 88
- 97
ge/graph/preprocess/insert_op/ge_aipp_op.cc View File

@@ -53,16 +53,6 @@
} \
} while (0)

#define AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(expr, _status, errormsg) \
do { \
bool b = (expr); \
if (!b) { \
GELOGE(_status, errormsg); \
ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \
return _status; \
} \
} while (0)

namespace {
const int32_t DEFAULT_MATRIX_R0C0_YUV2RGB = 298;
const int32_t DEFAULT_MATRIX_R0C1_YUV2RGB = 0;
@@ -316,9 +306,8 @@ NodePtr AippOp::FindDataByIndex(const ComputeGraphPtr &graph, int rank) {
}
return node;
}
GELOGE(PARAM_INVALID, "Can not find the data node by index %d", rank);
string errormsg = "Can not find the data node by aipp parameter related_input_rank " + to_string(rank);
ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg});
string error_msg = "Can not find the data node by aipp parameter related_input_rank " + to_string(rank);
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
return nullptr;
}
Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr &target,
@@ -363,10 +352,10 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr
}

if (!edge_indexes.empty() && (*edge_indexes.rbegin() >= data_node->GetOutDataNodes().size())) {
GELOGE(PARAM_INVALID, "input_edge_idx %u should smaller than out edge size of target input %zu",
*edge_indexes.rbegin(), data_node->GetOutDataNodes().size());
string errormsg = "The aipp parameter input_edge_idx should be smaller than the target input's outnodes.";
ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg});
string error_msg = "The aipp parameter input_edge_idx[" + std::to_string(*edge_indexes.rbegin()) +
"] should be smaller than the target input[" +
std::to_string(data_node->GetOutDataNodes().size()) +"]'s outnodes.";
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
return PARAM_INVALID;
}
target = data_node;
@@ -439,8 +428,7 @@ Status AippOp::ConvertRelatedInputNameToRank() {
if (!convert_flag) {
string error_msg = "Top name " + related_input_name + "convert rank failed, Please"
" ensure top name in aipp config is the top name of data node.";
ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg});
GELOGE(PARAM_INVALID, "Top name[%s] converts rank failed.", related_input_name.c_str());
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
return PARAM_INVALID;
}

@@ -537,87 +525,87 @@ Status AippOp::SetDefaultParams() {

Status AippOp::ValidateParams() {
GE_CHECK_NOTNULL(aipp_params_);
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->aipp_mode() != domi::AippOpParams::undefined, PARAM_INVALID,
"When insert AIPP op, aipp_mode must be configured as static or dynamic ");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_0_size() <= 1, PARAM_INVALID,
"The parameter var_reci_chn_0 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_1_size() <= 1, PARAM_INVALID,
"The parameter var_reci_chn_1 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_2_size() <= 1, PARAM_INVALID,
"The parameter var_reci_chn_2 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->var_reci_chn_3_size() <= 1, PARAM_INVALID,
"The parameter var_reci_chn_3 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r0c0_size() <= 1, PARAM_INVALID,
"The parameter matrix_r0c0 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r0c1_size() <= 1, PARAM_INVALID,
"The parameter matrix_r0c1 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r0c2_size() <= 1, PARAM_INVALID,
"The parameter matrix_r0c2 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r1c0_size() <= 1, PARAM_INVALID,
"The parameter matrix_r1c0 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r1c1_size() <= 1, PARAM_INVALID,
"The parameter matrix_r1c1 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r1c2_size() <= 1, PARAM_INVALID,
"The parameter matrix_r1c2 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r2c0_size() <= 1, PARAM_INVALID,
"The parameter matrix_r2c0 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r2c1_size() <= 1, PARAM_INVALID,
"The parameter matrix_r2c1 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->matrix_r2c2_size() <= 1, PARAM_INVALID,
"The parameter matrix_r2c2 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->output_bias_0_size() <= 1, PARAM_INVALID,
"The parameter output_bias_0 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->output_bias_1_size() <= 1, PARAM_INVALID,
"The parameter output_bias_1 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->output_bias_2_size() <= 1, PARAM_INVALID,
"The parameter output_bias_2 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_bias_0_size() <= 1, PARAM_INVALID,
"The parameter input_bias_0 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_bias_1_size() <= 1, PARAM_INVALID,
"The parameter input_bias_1 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_bias_2_size() <= 1, PARAM_INVALID,
"The parameter input_bias_2 can not be configed repeatedly");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_edge_idx_size() <= 1, PARAM_INVALID,
"The parameter input_edge_idx can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->aipp_mode() != domi::AippOpParams::undefined, PARAM_INVALID,
"When insert AIPP op, aipp_mode must be configured as static or dynamic ");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->var_reci_chn_0_size() <= 1, PARAM_INVALID,
"The parameter var_reci_chn_0 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->var_reci_chn_1_size() <= 1, PARAM_INVALID,
"The parameter var_reci_chn_1 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->var_reci_chn_2_size() <= 1, PARAM_INVALID,
"The parameter var_reci_chn_2 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->var_reci_chn_3_size() <= 1, PARAM_INVALID,
"The parameter var_reci_chn_3 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r0c0_size() <= 1, PARAM_INVALID,
"The parameter matrix_r0c0 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r0c1_size() <= 1, PARAM_INVALID,
"The parameter matrix_r0c1 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r0c2_size() <= 1, PARAM_INVALID,
"The parameter matrix_r0c2 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r1c0_size() <= 1, PARAM_INVALID,
"The parameter matrix_r1c0 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r1c1_size() <= 1, PARAM_INVALID,
"The parameter matrix_r1c1 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r1c2_size() <= 1, PARAM_INVALID,
"The parameter matrix_r1c2 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r2c0_size() <= 1, PARAM_INVALID,
"The parameter matrix_r2c0 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r2c1_size() <= 1, PARAM_INVALID,
"The parameter matrix_r2c1 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->matrix_r2c2_size() <= 1, PARAM_INVALID,
"The parameter matrix_r2c2 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->output_bias_0_size() <= 1, PARAM_INVALID,
"The parameter output_bias_0 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->output_bias_1_size() <= 1, PARAM_INVALID,
"The parameter output_bias_1 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->output_bias_2_size() <= 1, PARAM_INVALID,
"The parameter output_bias_2 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_bias_0_size() <= 1, PARAM_INVALID,
"The parameter input_bias_0 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_bias_1_size() <= 1, PARAM_INVALID,
"The parameter input_bias_1 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_bias_2_size() <= 1, PARAM_INVALID,
"The parameter input_bias_2 can not be configed repeatedly");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_edge_idx_size() <= 1, PARAM_INVALID,
"The parameter input_edge_idx can not be configed repeatedly");

const domi::AippOpParams::AippMode aipp_mode = aipp_params_->aipp_mode();
if (aipp_mode == domi::AippOpParams::dynamic) {
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(
GE_CHK_LOG_AND_ERRORMSG(
aipp_params_->max_src_image_size() > 0, PARAM_INVALID,
"For dynamic AIPP params, max_src_image_size must be set which number should be greater than 0");
} else {
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_format() != domi::AippOpParams::UNDEFINED, PARAM_INVALID,
"Input format of AIPP conf is undefined");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->src_image_size_w() >= 0, PARAM_INVALID,
"Src_image_size_w must not be configed smaller than 0");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->src_image_size_h() >= 0, PARAM_INVALID,
"Src_image_size_h must not be configed smaller than 0");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->load_start_pos_w() >= 0, PARAM_INVALID,
"Load_start_pos_w must not be configed smaller than 0");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->load_start_pos_h() >= 0, PARAM_INVALID,
"Load_start_pos_h must not be configed smaller than 0");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->crop_size_w() >= 0, PARAM_INVALID,
"Crop_size_w must not be configed smaller than 0");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->resize_output_w() >= 0, PARAM_INVALID,
"Resize_output_w must not be configed smaller than 0");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->resize_output_h() >= 0, PARAM_INVALID,
"Resize_output_h must not be configed smaller than 0");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->left_padding_size() >= 0, PARAM_INVALID,
"Left_padding_size must not be configed smaller than 0");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->right_padding_size() >= 0, PARAM_INVALID,
"Right_padding_size must not be configed smaller than 0");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->top_padding_size() >= 0, PARAM_INVALID,
"Top_padding_size must not be configed smaller than 0");
AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->bottom_padding_size() >= 0, PARAM_INVALID,
"Bottom_padding_size must not be configed smaller than 0");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->input_format() != domi::AippOpParams::UNDEFINED, PARAM_INVALID,
"Input format of AIPP conf is undefined");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->src_image_size_w() >= 0, PARAM_INVALID,
"Src_image_size_w must not be configed smaller than 0");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->src_image_size_h() >= 0, PARAM_INVALID,
"Src_image_size_h must not be configed smaller than 0");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->load_start_pos_w() >= 0, PARAM_INVALID,
"Load_start_pos_w must not be configed smaller than 0");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->load_start_pos_h() >= 0, PARAM_INVALID,
"Load_start_pos_h must not be configed smaller than 0");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->crop_size_w() >= 0, PARAM_INVALID,
"Crop_size_w must not be configed smaller than 0");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->resize_output_w() >= 0, PARAM_INVALID,
"Resize_output_w must not be configed smaller than 0");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->resize_output_h() >= 0, PARAM_INVALID,
"Resize_output_h must not be configed smaller than 0");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->left_padding_size() >= 0, PARAM_INVALID,
"Left_padding_size must not be configed smaller than 0");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->right_padding_size() >= 0, PARAM_INVALID,
"Right_padding_size must not be configed smaller than 0");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->top_padding_size() >= 0, PARAM_INVALID,
"Top_padding_size must not be configed smaller than 0");
GE_CHK_LOG_AND_ERRORMSG(aipp_params_->bottom_padding_size() >= 0, PARAM_INVALID,
"Bottom_padding_size must not be configed smaller than 0");
}

return SUCCESS;
@@ -790,17 +778,20 @@ Status AippOp::CreateAippData(const NodePtr &aipp_node) {

int64_t batch_count = -1;
if (GetDataDimN(data_node, ori_data_format, batch_count) != ge::SUCCESS) {
GELOGE(PARAM_INVALID, "Get data_node dims and transfer to nchw_dims failed!");
string error_msg = "Get data_node dims and transfer to nchw_dims failed!";
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
return PARAM_INVALID;
}
if (batch_count <= 0) {
GELOGE(PARAM_INVALID, "Batch count %ld is invalid", batch_count);
string error_msg = "Batch count[" + std::to_string(batch_count) + "] is invalid, it must positive.";
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
return PARAM_INVALID;
}

int64_t max_dynamic_aipp_size = CalcMaxSize(batch_count);
if (max_dynamic_aipp_size < 0) {
GELOGE(PARAM_INVALID, "The dynamic aipp size is not positive.");
string error_msg = "The dynamic aipp size is not positive";
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
return PARAM_INVALID;
}



+ 24
- 30
ge/graph/preprocess/insert_op/util_insert_aipp_op.cc View File

@@ -124,19 +124,13 @@ Status InsertNewOpUtil::CheckInputNamePositionNotRepeat() {
if (another_item->related_input_name().empty()) {
string error_msg = "Can not both set related_input_name and related_input_rank!"
" Please ensure param is the same with the first aipp config(related_input_name).";
ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg});
GELOGE(PARAM_INVALID,
"Can not both set related_input_rank and related_input_name!"
" Please ensure param is the same with the first aipp config(related_input_name).");
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
return PARAM_INVALID;
}
if (item->related_input_name() == another_item->related_input_name()) {
string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_name"
" param is different in different aipp config.";
ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg});
GELOGE(PARAM_INVALID,
"Can not insert aipp op to the same postion! Please ensure related_input_rank param "
"is different in different aipp config.");
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
return PARAM_INVALID;
}
}
@@ -156,19 +150,13 @@ Status InsertNewOpUtil::CheckInputRankPositionNoRepeat() {
if (!another_item->related_input_name().empty()) {
string error_msg = "Can not both set related_input_rank and related_input_name!"
" Please ensure param is the same with the first aipp config(related_input_rank).";
ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg});
GELOGE(PARAM_INVALID,
"Can not both set related_input_rank and related_input_name!"
" Please ensure param is the same with the first aipp config(related_input_rank).");
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
return PARAM_INVALID;
}
if (item->related_input_rank() == another_item->related_input_rank()) {
string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_rank"
" param is different in different aipp config.";
ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg});
GELOGE(PARAM_INVALID,
"Can not insert aipp op to the same postion! Please ensure related_input_rank param "
"is different in different aipp config.");
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
return PARAM_INVALID;
}
}
@@ -224,9 +212,10 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) {
}
}
}
GE_CHK_BOOL_RET_STATUS((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt), PARAM_INVALID,
"Can not config part of outputs of Data node to support AIPP, config all "
"of the outputs of Data to support AIPP, or config none of them");
GE_CHK_LOG_AND_ERRORMSG((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt),
PARAM_INVALID,
"Can not config part of outputs of Data node to support AIPP, config all "
"of the outputs of Data to support AIPP, or config none of them");

std::unique_ptr<domi::AippOpParams> aippParams(new (std::nothrow) domi::AippOpParams());
GE_CHECK_NOTNULL(aippParams);
@@ -238,16 +227,19 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) {
GE_CHK_STATUS(GetAippParams(currAippParam, aippNodes[i]));

if (aippMode == domi::AippOpParams::static_) {
GE_CHK_BOOL_RET_STATUS(aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID,
"The input_format of all aipp_ops after one Data should be the same");
GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_w() == currAippParam->src_image_size_w(), PARAM_INVALID,
"The src_image_size_w of all aipp_ops after one Data should be the same");
GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_h() == currAippParam->src_image_size_h(), PARAM_INVALID,
"The src_image_size_h of all aipp_ops after one Data should be the same");
GE_CHK_LOG_AND_ERRORMSG(
aippParams->input_format() == currAippParam->input_format(),
PARAM_INVALID, "The input_format of all aipp_ops after one Data should be the same");
GE_CHK_LOG_AND_ERRORMSG(
aippParams->src_image_size_w() == currAippParam->src_image_size_w(),
PARAM_INVALID, "The src_image_size_w of all aipp_ops after one Data should be the same");
GE_CHK_LOG_AND_ERRORMSG(
aippParams->src_image_size_h() == currAippParam->src_image_size_h(),
PARAM_INVALID, "The src_image_size_h of all aipp_ops after one Data should be the same");
} else {
GE_CHK_BOOL_RET_STATUS(aippParams->max_src_image_size() == currAippParam->max_src_image_size(),
PARAM_INVALID,
"The max_src_image_size of all aipp_ops after one Data should be the same");
GE_CHK_LOG_AND_ERRORMSG(
aippParams->max_src_image_size() == currAippParam->max_src_image_size(),
PARAM_INVALID, "The max_src_image_size of all aipp_ops after one Data should be the same");
}
});
}
@@ -290,7 +282,8 @@ Status InsertNewOpUtil::UpdateDataNodeByAipp(const ComputeGraphPtr &graph) {
for (auto &switchn : updated_switchn) {
auto data_iter = switchn_names_to_data.find(switchn->GetName());
if (data_iter == switchn_names_to_data.end()) {
GELOGE(INTERNAL_ERROR, "Failed to find relative data node by switchn %s", switchn->GetName().c_str());
string error_msg = "Failed to find relative data node by switchn[" + switchn->GetName() + "]";
GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, error_msg.c_str());
return INTERNAL_ERROR;
}
GE_RETURN_IF_ERROR(UpdateDataBySwitchN(switchn, data_iter->second));
@@ -477,7 +470,8 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt
}
}
if (max_index >= switchn->GetOpDesc()->GetOutputsSize()) {
GELOGE(INTERNAL_ERROR, "No max size found from switchn node %s", switchn->GetName().c_str());
string error_msg = "No max size found from switchn node[" + switchn->GetName()+ "]";
GE_ERRORLOG_AND_ERRORMSG(INTERNAL_ERROR, error_msg.c_str());
return INTERNAL_ERROR;
}
auto output_desc = switchn->GetOpDesc()->MutableOutputDesc(max_index);


+ 9
- 0
ge/graph/preprocess/multi_batch_copy_graph.cc View File

@@ -595,6 +595,8 @@ Status MultiBatchGraphCopyer::CheckCopyResult(const std::vector<NodePtr> &start_
}
auto dims = NodeUtils::GetOutputDesc(*node, kDataOutIndex).GetShape().GetDims();
if (!IsAllDimsPositive(dims)) {
ErrorManager::GetInstance().ATCReportErrMessage("E15004", {"opname", "shape"},
{node->GetName(), formats::ShapeToString(dims)});
GELOGE(INTERNAL_ERROR, "Failed to copy multi batch graph, the node %s still has unknown shape %s",
node->GetName().c_str(), formats::ShapeToString(dims).c_str());
return INTERNAL_ERROR;
@@ -1025,6 +1027,13 @@ Status MultiBatchGraphCopyer::InsertIdentityAfterSwitchN() {
}

Status ProcessMultiBatch(ComputeGraphPtr &graph) {
const char *multi_batch_with_case = std::getenv("MULTI_BATCH_WITH_CASE");
if (multi_batch_with_case != nullptr) {
PassManager pass_manager;
GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass));
return pass_manager.Run(graph);
}

std::vector<std::vector<int64_t>> shapes;
if (!InitDynamicParams(shapes)) {
GELOGD("There is no multi-batch options, no need to process multi-batch copy");


+ 4
- 0
ge/graph/preprocess/multi_batch_options.cc View File

@@ -124,6 +124,8 @@ Status ParserDataToDynmaicInfo(const vector<vector<int64_t>> &shapes,
auto tmp_index = cur_data_index;
for (size_t i = 0; i < static_cast<size_t>(dynamic_dims_num); ++i) {
if (tmp_index >= dynamic_gear_info.size()) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E10045", {"name", "shape"}, {data_name, formats::JoinToString(data_shape)});
GELOGE(PARAM_INVALID, "Data: %s shape: %s make dynamic dims overflow", data_name.c_str(),
formats::JoinToString(data_shape).c_str());
return FAILED;
@@ -131,6 +133,8 @@ Status ParserDataToDynmaicInfo(const vector<vector<int64_t>> &shapes,
one_gear.push_back(dynamic_gear_info[tmp_index++]);
}
} else {
ErrorManager::GetInstance().ATCReportErrMessage(
"E10046", {"name", "shape"}, {data_name, formats::JoinToString(data_shape)});
GELOGE(PARAM_INVALID, "Dynamic dims num of data: %s shape: %s can not be more than one gear dynamic info size",
data_name.c_str(), formats::JoinToString(data_shape).c_str());
return FAILED;


+ 3
- 1
ge/host_kernels/slice_kernel.cc View File

@@ -100,7 +100,9 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vector<ConstGeTenso
}
// construct tensorDesc
ge::GeShape output_shape(output_dims);
GeTensorDesc output_tensor_desc(output_shape, FORMAT_NCHW, data_type);
auto attr_output_tensor_desc = attr->GetOutputDesc(0);
GeTensorDesc output_tensor_desc(attr_output_tensor_desc);
output_tensor_desc.SetShape(output_shape);
GeTensorPtr output_ptr = MakeShared<GeTensor>(output_tensor_desc);
if (output_ptr == nullptr) {
GELOGW("make_shared ge::GeTensor failed, node name %s.", attr->GetName().c_str());


+ 3
- 1
ge/hybrid/executor/worker/execution_engine.cc View File

@@ -259,7 +259,9 @@ Status NodeDoneCallback::ProfilingReport() {
return profiling_ret;
}

ProfilingManager::Instance().ReportProfilingData(task_desc_info, compute_graph_info);
auto &profiling_manager = ProfilingManager::Instance();
profiling_manager.ReportProfilingData(model->GetModelId(), task_desc_info, compute_graph_info,
!profiling_manager.IsAclApiMode());
return SUCCESS;
}



+ 27
- 15
ge/hybrid/node_executor/aicore/aicore_node_executor.cc View File

@@ -17,8 +17,6 @@
#include "aicore_node_executor.h"
#include "cce/taskdown_common.hpp"
#include "hybrid/executor/hybrid_execution_context.h"
#include "init/gelib.h"
#include "hybrid/executor/hybrid_execution_context.h"

namespace ge {
namespace hybrid {
@@ -28,19 +26,10 @@ AiCoreNodeTask::AiCoreNodeTask(std::vector<std::unique_ptr<AiCoreOpTask>> &&task
}

Status AiCoreNodeExecutor::Initialize() {
auto ge_lib = GELib::GetInstance();
GE_CHECK_NOTNULL(ge_lib);
if (!ge_lib->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge_lib is uninitialized, failed.");
return GE_CLI_GE_NOT_INITIALIZED;
compiler_ = TaskCompilerFactory::GetInstance().GetTaskCompiler();
if (compiler_ != nullptr) {
GE_CHK_STATUS_RET(compiler_->Initialize(), "Failed to init aicore task compiler.");
}

auto &kernel_manager = ge_lib->OpsKernelManagerObj();
auto aic_ops_store = kernel_manager.GetOpsKernelInfoStore("AIcoreEngine");
GE_CHECK_NOTNULL(aic_ops_store);

compiler_.reset(new(std::nothrow)AiCoreTaskCompiler(aic_ops_store));
GE_CHECK_NOTNULL(compiler_);
return SUCCESS;
}

@@ -120,6 +109,12 @@ Status AiCoreNodeExecutor::CompileTask(const HybridModel &model,
GE_CHECK_NOTNULL(op_desc);
GELOGI("AiCoreNodeExecutor(%s) CompileTask Start.", node->GetName().c_str());

auto ori_node_name = node->GetName();
if (compiler_ == nullptr) {
GELOGE(FAILED, "[%s] Can not find any valid aicore task compiler.", ori_node_name.c_str());
return FAILED;
}

AiCoreNodeTaskRegistry &registry = AiCoreNodeTaskRegistry::GetInstance();
std::string shape_key;
GE_CHK_STATUS_RET(GenNodeKey(node, shape_key), "GenNodeKey failed, op name = %s.", node->GetName().c_str());
@@ -133,7 +128,6 @@ Status AiCoreNodeExecutor::CompileTask(const HybridModel &model,
}

std::vector<domi::TaskDef> task_defs;
auto ori_node_name = node->GetName();
op_desc->SetName(ori_node_name + "_" + shape_key);
GE_CHK_STATUS_RET(compiler_->CompileOp(node, task_defs), "Compile op(%s) failed.", ori_node_name.c_str());
op_desc->SetName(ori_node_name);
@@ -239,5 +233,23 @@ bool AiCoreNodeTask::IsNoOp(TaskContext &task_context) {

return true;
}

TaskCompilerFactory &TaskCompilerFactory::GetInstance() {
static TaskCompilerFactory instance;
return instance;
}

void TaskCompilerFactory::Register(CreateFn fn) {
compiler_func_ = fn;
}

std::unique_ptr<TaskCompiler> TaskCompilerFactory::GetTaskCompiler() {
auto compiler_instance = std::unique_ptr<TaskCompiler>(compiler_func_());
return compiler_instance;
}

CompilerFunctionRegistrar::CompilerFunctionRegistrar(CreateFn fn) {
TaskCompilerFactory::GetInstance().Register(fn);
}
} // namespace hybrid
} // namespace ge

+ 36
- 3
ge/hybrid/node_executor/aicore/aicore_node_executor.h View File

@@ -18,13 +18,21 @@
#define GE_HYBRID_KERNEL_AICORE_NODE_EXECUTOR_H_

#include "hybrid/node_executor/aicore/aicore_task_builder.h"
#include "hybrid/node_executor/aicore/aicore_task_compiler.h"
#include "hybrid/node_executor/node_executor.h"
#include <map>
#include <mutex>

namespace ge {
namespace hybrid {

class TaskCompiler {
public:
TaskCompiler() = default;
virtual ~TaskCompiler() = default;
virtual Status CompileOp(const NodePtr &node, std::vector<domi::TaskDef> &tasks) = 0;
virtual Status Initialize() = 0;
};

class AiCoreNodeTaskRegistry {
public:
~AiCoreNodeTaskRegistry() = default;
@@ -65,8 +73,33 @@ class AiCoreNodeExecutor : public NodeExecutor {

private:
static Status GenNodeKey(const NodePtr &node, std::string &node_key);
std::unique_ptr<AiCoreTaskCompiler> compiler_;
std::unique_ptr<TaskCompiler> compiler_;
};

using CreateFn = TaskCompiler *(*)();
class TaskCompilerFactory {
public:
static TaskCompilerFactory &GetInstance();
void Register(CreateFn fn);
std::unique_ptr<TaskCompiler> GetTaskCompiler();

private:
CreateFn compiler_func_;
};

class CompilerFunctionRegistrar {
public:
CompilerFunctionRegistrar(CreateFn fn);
~CompilerFunctionRegistrar() = default;
};
} // namespace hybrid
} // namespace ge
#endif //GE_HYBRID_KERNEL_AICORE_NODE_EXECUTOR_H_

#define REGISTER_TASK_COMPILER(compiler) \
static ::ge::hybrid::CompilerFunctionRegistrar register_compiler_function \
__attribute__((unused)) = \
::ge::hybrid::CompilerFunctionRegistrar([]()->::ge::hybrid::TaskCompiler* { \
return new (std::nothrow) compiler(); \
}) \

#endif //GE_HYBRID_KERNEL_AICORE_NODE_EXECUTOR_H_

+ 14
- 2
ge/hybrid/node_executor/aicore/aicore_task_compiler.cc View File

@@ -18,6 +18,7 @@
#include "framework/common/debug/log.h"
#include "graph/debug/ge_attr_define.h"
#include "opskernel_manager/ops_kernel_builder_manager.h"
#include "init/gelib.h"

namespace ge {
namespace hybrid {
@@ -25,11 +26,22 @@ namespace {
uintptr_t kWeightBase = 0x10000000;
uintptr_t kMemBase = 0x20000000;
uint64_t kFakeSize = 0x10000000UL;
REGISTER_TASK_COMPILER(AiCoreTaskCompiler);
}
std::mutex AiCoreTaskCompiler::mu_;

AiCoreTaskCompiler::AiCoreTaskCompiler(OpsKernelInfoStorePtr aic_kernel_store)
: aic_kernel_store_(std::move(aic_kernel_store)) {}
Status AiCoreTaskCompiler::Initialize() {
auto ge_lib = GELib::GetInstance();
GE_CHECK_NOTNULL(ge_lib);
if (!ge_lib->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge_lib is uninitialized, failed.");
return GE_CLI_GE_NOT_INITIALIZED;
}
auto &kernel_manager = ge_lib->OpsKernelManagerObj();
aic_kernel_store_ = kernel_manager.GetOpsKernelInfoStore("AIcoreEngine");
GE_CHECK_NOTNULL(aic_kernel_store_);
return SUCCESS;
}

Status AiCoreTaskCompiler::DoCompileOp(const NodePtr &node) const {
GE_CHECK_NOTNULL(node);


+ 5
- 3
ge/hybrid/node_executor/aicore/aicore_task_compiler.h View File

@@ -19,15 +19,17 @@

#include <mutex>
#include "opskernel_manager/ops_kernel_manager.h"
#include "aicore_node_executor.h"

namespace ge {
namespace hybrid {
class AiCoreTaskCompiler {
class AiCoreTaskCompiler : public TaskCompiler {
public:
explicit AiCoreTaskCompiler(OpsKernelInfoStorePtr aic_kernel_store);
AiCoreTaskCompiler() = default;
~AiCoreTaskCompiler() = default;

Status CompileOp(const NodePtr &node, std::vector<domi::TaskDef> &tasks);
Status CompileOp(const NodePtr &node, std::vector<domi::TaskDef> &tasks) override;
Status Initialize() override;
private:
Status DoCompileOp(const NodePtr &node) const;
Status DoGenerateTask(const Node &node, std::vector<domi::TaskDef> &tasks);


+ 6
- 0
ge/init/gelib.cc View File

@@ -56,6 +56,7 @@ const int kDefaultDeviceIdForInfer = -1;
const uint32_t kAicoreOverflow = (0x1 << 0);
const uint32_t kAtomicOverflow = (0x1 << 1);
const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow);
const char *const kGlobalOptionFpCeilingModeDefault = "2";
} // namespace
static std::shared_ptr<GELib> instancePtr_ = nullptr;

@@ -79,6 +80,11 @@ Status GELib::Initialize(const map<string, string> &options) {
return ret;
}
instancePtr_->SetDefaultPrecisionMode(new_options);

if (new_options.find("ge.fpCeilingMode") == new_options.end()) {
new_options["ge.fpCeilingMode"] = kGlobalOptionFpCeilingModeDefault;
}

GetMutableGlobalOptions().insert(new_options.begin(), new_options.end());
GetThreadLocalContext().SetGlobalOption(GetMutableGlobalOptions());
GE_TIMESTAMP_START(Init);


+ 0
- 20
ge/offline/main.cc View File

@@ -32,7 +32,6 @@
#include "graph/anchor.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/graph.h"
#include "graph/manager/graph_var_manager.h"
#include "graph/op_desc.h"
#include "graph/utils/graph_utils.h"
#include "graph/utils/type_utils.h"
@@ -64,8 +63,6 @@ using std::vector;

static bool is_dynamic_input = false;

// 310 limited 8G size
const char *const kGraphMemoryManagerMallocMaxSize = "8*1024*1024*1024";
const char *const kModeSupport = "only support 0(model to framework model), "
"1(framework model to json), 3(only pre-check), 5(pbtxt to json)";
const char *const kModelToJsonSupport = "only support 0(Caffe) 3(TensorFlow) 5(Onnx)";
@@ -908,13 +905,6 @@ domi::Status GenerateModel(std::map<string, string> &options, std::string output
return domi::FAILED;
}

geRet = ge::VarManager::Instance(0)->SetMemoryMallocSize(options);
if (geRet != ge::SUCCESS) {
GELOGE(ge::FAILED, "SetMemoryMallocSize failed.");
(void)ge::GELib::GetInstance()->Finalize();
return domi::FAILED;
}

ge::Graph graph;
std::vector<ge::GeTensor> inputs;
if (FLAGS_framework == domi::MINDSPORE) {
@@ -1016,7 +1006,6 @@ static void SetEnvForSingleOp(std::map<string, string> &options) {
options.emplace(ge::OP_SELECT_IMPL_MODE, FLAGS_op_select_implmode);
options.emplace(ge::OPTYPELIST_FOR_IMPLMODE, FLAGS_optypelist_for_implmode);
options.emplace(ge::AUTO_TUNE_MODE, FLAGS_auto_tune_mode);
options.emplace(ge::GRAPH_MEMORY_MAX_SIZE, kGraphMemoryManagerMallocMaxSize);
options.emplace(ge::OP_DEBUG_LEVEL, to_string(FLAGS_op_debug_level));
options.emplace(ge::DEBUG_DIR, FLAGS_debug_dir);
options.emplace(ge::OP_COMPILER_CACHE_DIR, FLAGS_op_compiler_cache_dir);
@@ -1053,13 +1042,6 @@ domi::Status GenerateSingleOp(const std::string& json_file_path) {
return domi::FAILED;
}

ret = ge::VarManager::Instance(0)->SetMemoryMallocSize(options);
if (ret != ge::SUCCESS) {
GELOGE(ge::FAILED, "SetMemoryMallocSize failed.");
(void)ge::GELib::GetInstance()->Finalize();
return domi::FAILED;
}

vector<ge::SingleOpBuildParam> build_params;
if (ge::SingleOpParser::ParseSingleOpList(json_file_path, build_params) != ge::SUCCESS) {
DOMI_LOGE("parse single op json file failed");
@@ -1158,8 +1140,6 @@ domi::Status GenerateOmModel() {
(FLAGS_enable_compress_weight == "true") ?
ge::kEnableCompressWeightTrue : ge::kEnableCompressWeightFalse));

options.insert(std::pair<string, string>(string(ge::GRAPH_MEMORY_MAX_SIZE), kGraphMemoryManagerMallocMaxSize));

options.insert(std::pair<string, string>(string(ge::ENABLE_SINGLE_STREAM), FLAGS_enable_single_stream));

options.insert(std::pair<string, string>(string(ge::DEBUG_DIR), FLAGS_debug_dir));


+ 4
- 0
ge/session/omg.cc View File

@@ -485,6 +485,10 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const
GELOGE(domi::FAILED, "Check out node (%s) fail.", user_out_nodes[i].first.c_str());
return domi::FAILED;
}

// add user_define_output_nodes attr.
(void)ge::AttrUtils::SetStr(op_desc, ATTR_ATC_USER_DEFINE_OUTPUT_NODES, "true");

if (i < output_formats.size()) {
if (output_formats[i] == domi::DOMI_TENSOR_NC1HWC0) {
GELOGI("The output node [%s] should be set NC1HWC0", user_out_nodes[i].first.c_str());


+ 1
- 0
inc/external/ge/ge_api_types.h View File

@@ -339,6 +339,7 @@ const std::set<std::string> ir_builder_suppported_options = {INPUT_FORMAT,
OUT_NODES,
INPUT_FP16_NODES,
LOG_LEVEL,
OP_DEBUG_LEVEL,
DEBUG_DIR,
OP_COMPILER_CACHE_DIR,
OP_COMPILER_CACHE_MODE};


+ 30
- 14
inc/framework/common/debug/log.h View File

@@ -28,7 +28,7 @@
#if !defined(__ANDROID__) && !defined(ANDROID)
#define DOMI_LOGE(...) GE_LOG_ERROR(GE_MODULE_NAME, ge::FAILED, __VA_ARGS__)
#else
#include<android/log.h>
#include <android/log.h>
#if defined(BUILD_VERSION_PERF)
#define DOMI_LOGE(fmt, ...)
#else
@@ -83,12 +83,12 @@
} while (0);

// If expr is not GRAPH_SUCCESS, print the log and return FAILED
#define GE_CHK_GRAPH_STATUS_RET(expr, ...) \
do { \
if ((expr) != ge::GRAPH_SUCCESS) { \
DOMI_LOGE(__VA_ARGS__); \
return FAILED; \
} \
#define GE_CHK_GRAPH_STATUS_RET(expr, ...) \
do { \
if ((expr) != ge::GRAPH_SUCCESS) { \
DOMI_LOGE(__VA_ARGS__); \
return FAILED; \
} \
} while (0);

// If expr is not SUCCESS, print the log and execute a custom statement
@@ -99,13 +99,13 @@
} while (0);

// If expr is not true, print the log and return the specified status
#define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \
do { \
bool b = (expr); \
if (!b) { \
GELOGE(_status, __VA_ARGS__); \
return _status; \
} \
#define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \
do { \
bool b = (expr); \
if (!b) { \
GELOGE(_status, __VA_ARGS__); \
return _status; \
} \
} while (0);

// If expr is not true, print the log and return the specified status
@@ -253,4 +253,20 @@
exec_expr1; \
}

#define GE_ERRORLOG_AND_ERRORMSG(_status, errormsg) \
{ \
GELOGE(_status, "%s", errormsg); \
ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \
}

#define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg) \
do { \
bool b = (expr); \
if (!b) { \
GELOGE(_status, "%s", errormsg); \
ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); \
return _status; \
} \
} while (0)

#endif // INC_FRAMEWORK_COMMON_DEBUG_LOG_H_

+ 5
- 4
inc/framework/common/types.h View File

@@ -70,6 +70,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFIL
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_STOP_VALUE;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::map<std::string, std::string> PROFILE_COMPONENT_MAP;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_CONFIG;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_MODEL_ID;

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASKS;
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR;
@@ -567,10 +568,10 @@ enum ModelCheckType {
/// @brief dynamic input type
///
enum DynamicInputType {
FIXED = 0, // default mode
DYNAMIC_BATCH = 1,
DYNAMIC_IMAGE = 2,
DYNAMIC_DIMS = 3
FIXED = 0, // default mode
DYNAMIC_BATCH = 1,
DYNAMIC_IMAGE = 2,
DYNAMIC_DIMS = 3
};

///


+ 12
- 12
inc/framework/executor/ge_executor.h View File

@@ -38,14 +38,14 @@ class DynamicSingleOp;
struct RunModelData {
uint32_t index; // Data index
uint32_t modelId;
std::vector<DataBuffer> blobs; // All input/output data buffer
uint32_t timestamp; // Data creation time
uint32_t timeout; // Processing timeout
uint64_t request_id = 0; // Request ID
uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0
uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0
uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0
std::vector<uint64_t> dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty
std::vector<DataBuffer> blobs; // All input/output data buffer
uint32_t timestamp; // Data creation time
uint32_t timeout; // Processing timeout
uint64_t request_id = 0; // Request ID
uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0
uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0
uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0
std::vector<uint64_t> dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty
};

class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
@@ -264,14 +264,14 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream,
DynamicSingleOp **single_op);

static ge::Status ExecuteAsync(DynamicSingleOp *executor,
const std::vector<GeTensorDesc> &input_desc,
const std::vector<DataBuffer> &inputs,
std::vector<GeTensorDesc> &output_desc,
static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc,
const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc,
std::vector<DataBuffer> &outputs);

static ge::Status ReleaseSingleOpResource(void *stream);

static ge::Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id);

ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count);
ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,


+ 1
- 1
metadef

@@ -1 +1 @@
Subproject commit 0d0d2fb016d44f9a575ad8f8c2cb8858bba3acec
Subproject commit 37465b85d30b67a0edcc6ea4acd2f11a9697c7af

+ 1
- 1
parser

@@ -1 +1 @@
Subproject commit 84ea76e94054fcfac5b80ded6e0ec4db1f37d3e0
Subproject commit 5fa1f3ed9b1785b9fd1623d624de91838dff615e

Loading…
Cancel
Save