@@ -133,6 +133,7 @@ set(EXECUTOR_SRC_LIST | |||
"graph/load/model_manager/task_info/event_record_task_info.cc" | |||
"graph/load/model_manager/task_info/event_wait_task_info.cc" | |||
"graph/load/model_manager/task_info/ffts_task_info.cc" | |||
"graph/load/model_manager/task_info/ffts_plus_task_info.cc" | |||
"graph/load/model_manager/task_info/fusion_start_task_info.cc" | |||
"graph/load/model_manager/task_info/fusion_stop_task_info.cc" | |||
#"graph/load/model_manager/task_info/hccl_task_info.cc" # Just for runner. | |||
@@ -120,6 +120,39 @@ class PluginManager { | |||
} | |||
return SUCCESS; | |||
} | |||
template <typename T1, typename T2> | |||
Status OptionalInvokeAll(const string &func_name, T1 arg1, T2 arg2) { | |||
for (const auto &handle : handles_) { | |||
// If the funcName is existed, signature of realFn can be casted to any type | |||
auto real_fn = (void (*)(T1, T2))mmDlsym(handle.second, const_cast<char *>(func_name.c_str())); | |||
if (real_fn == nullptr) { | |||
continue; | |||
} else { | |||
typename std::remove_reference<T1>::type arg1_temp; | |||
typename std::remove_reference<T2>::type arg2_temp; | |||
real_fn(arg1_temp, arg2_temp); | |||
CheckAndInsert(handle.first, func_name, arg1, arg1_temp); | |||
CheckAndInsert(handle.first, func_name, arg2, arg2_temp); | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
template <typename T> | |||
void CheckAndInsert(const std::string &so_name, const std::string &func_name, T arg, T arg_tmp) { | |||
if (std::is_same<typename std::remove_reference<T>::type, map<std::string, std::shared_ptr<DNNEngine>>>::value) { | |||
for (const auto &val : arg_tmp) { | |||
if (arg.find(val.first) != arg.end()) { | |||
GELOGW("FuncName %s in so %s find the same key: %s, will replace it", func_name.c_str(), | |||
so_name.c_str(), val.first.c_str()); | |||
arg[val.first] = val.second; | |||
} | |||
} | |||
} | |||
arg.insert(arg_tmp.begin(), arg_tmp.end()); | |||
} | |||
template <typename T1, typename T2> | |||
Status InvokeAll(const string &func_name, T1 arg) { | |||
for (const auto &handle : handles_) { | |||
@@ -17,20 +17,15 @@ | |||
#include "engine_manager/dnnengine_manager.h" | |||
#include <cstdio> | |||
#include <fstream> | |||
#include <map> | |||
#include <utility> | |||
#include "framework/common/debug/log.h" | |||
#include "common/ge/ge_util.h" | |||
#include "common/util/error_manager/error_manager.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "analyzer/analyzer.h" | |||
#include "graph/ge_context.h" | |||
#include "graph/utils/graph_utils.h" | |||
#include "graph/utils/node_utils.h" | |||
#include "init/gelib.h" | |||
#include "framework/common/types.h" | |||
namespace { | |||
const char *const kSchedulerUnits = "schedule_units"; | |||
@@ -40,7 +35,7 @@ const char *const kExAttrs = "ex_attrs"; | |||
const char *const kIndependent = "independent"; | |||
const char *const kSkipAssignStream = "skip_assign_stream"; | |||
const char *const kCalEngines = "cal_engines"; | |||
const char *const kAttch = "attach"; | |||
const char *const kAttach = "attach"; | |||
const char *const kVectorCore = "VectorCore"; | |||
const char *const kVectorEngine = "VectorEngine"; | |||
const char *const kAIcoreEngine = "AIcoreEngine"; | |||
@@ -51,6 +46,10 @@ const char *const kHostCpuOpKernelLibName = "DNN_VM_HOST_CPU_OP_STORE"; | |||
namespace ge { | |||
namespace { | |||
const std::set<std::string> kNotCpuOp = {DATA, CONSTANT, CONSTANTOP, VARIABLE, NETOUTPUT}; | |||
const char *const kGetDNNEngineObjs = "GetDNNEngineObjs"; | |||
const char *const kGetCompoundEngineContains = "GetCompoundEngineContains"; | |||
const char *const kInvalidCompoundEngineName = "InvalidCompoundEngineName"; | |||
constexpr uint32_t kMaxRecursiveDepth = 10; | |||
bool ExecOnHostCpu(const OpDescPtr &op_desc) { | |||
bool is_host_cpu_op = (kNotCpuOp.find(op_desc->GetType()) == kNotCpuOp.end()); | |||
@@ -72,22 +71,21 @@ Status DNNEngineManager::Initialize(const std::map<std::string, std::string> &op | |||
} | |||
// Load engine so | |||
std::string so_path = "plugin/nnengine/"; | |||
std::string plugin_so_path = "plugin/nnengine/"; | |||
std::string path = PluginManager::GetPath(); | |||
path.append(so_path); | |||
std::string so_api_func = "GetDNNEngineObjs"; | |||
std::vector<std::string> so_func{so_api_func}; | |||
Status status = plugin_mgr_.Load(path, so_func); | |||
std::string engine_plugin_path = path + plugin_so_path; | |||
std::vector<std::string> so_func{kGetDNNEngineObjs}; | |||
Status status = plugin_mgr_.Load(engine_plugin_path, so_func); | |||
if (status != SUCCESS) { | |||
GELOGE(status, "[Load][EngineSo]Failed, lib path %s", path.c_str()); | |||
REPORT_CALL_ERROR("E19999", "Load engine so failed, lib path %s", path.c_str()); | |||
REPORT_CALL_ERROR("E19999", "Load engine so failed, lib path %s", engine_plugin_path.c_str()); | |||
return status; | |||
} | |||
status = plugin_mgr_.InvokeAll<std::map<std::string, DNNEnginePtr> &>(so_api_func, engines_map_); | |||
status = plugin_mgr_.InvokeAll<std::map<std::string, DNNEnginePtr> &>(kGetDNNEngineObjs, engines_map_); | |||
if (status != SUCCESS) { | |||
GELOGE(status, "[Get][DNNEngineObjs]Failed, so_api_func %s", so_api_func.c_str()); | |||
REPORT_CALL_ERROR("E19999", "Get DNNEngineObjs failed, so_api_func %s", so_api_func.c_str()); | |||
GELOGE(status, "[Get][DNNEngineObjs]Failed, so_api_func %s", kGetDNNEngineObjs); | |||
REPORT_CALL_ERROR("E19999", "Get DNNEngineObjs failed, so_api_func %s", kGetDNNEngineObjs); | |||
return status; | |||
} | |||
@@ -117,8 +115,8 @@ Status DNNEngineManager::Initialize(const std::map<std::string, std::string> &op | |||
if ((attrs.mem_type.size()) != 1 || (attrs.mem_type[0] != GE_ENGINE_ATTR_MEM_TYPE_HBM)) { | |||
GELOGE(GE_ENG_MEMTYPE_ERROR, "[Check][Param]Engine %s in aicore, but the memory type is " | |||
"not HBM, mem_type_size %lu", (iter->first).c_str(), attrs.mem_type.size()); | |||
REPORT_INNER_ERROR("E19999", "Engine %s in aicore, but the memory type is not HBM, " | |||
"mem_type_size %lu", (iter->first).c_str(), attrs.mem_type.size()); | |||
REPORT_INNER_ERROR("E19999", "Engine %s in aicore, but the memory type is not HBM, mem_type_size %lu", | |||
(iter->first).c_str(), attrs.mem_type.size()); | |||
return GE_ENG_MEMTYPE_ERROR; | |||
} | |||
} | |||
@@ -161,6 +159,7 @@ Status DNNEngineManager::Finalize() { | |||
} | |||
init_flag_ = false; | |||
engines_map_.clear(); | |||
atomic_2_compound_.clear(); | |||
return SUCCESS; | |||
} | |||
@@ -183,7 +182,7 @@ bool DNNEngineManager::IsEngineRegistered(const std::string &name) { | |||
return false; | |||
} | |||
void DNNEngineManager::InitPerformanceStaistic() { | |||
void DNNEngineManager::InitPerformanceStatistic() { | |||
std::lock_guard<std::mutex> lock(mutex_); | |||
checksupport_cost_.clear(); | |||
} | |||
@@ -221,43 +220,42 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) { | |||
std::string exclude_core_Type = (ge_core_type == kVectorCore) ? kAIcoreEngine : kVectorEngine; | |||
GELOGD("engine type will exclude: %s", exclude_core_Type.c_str()); | |||
auto root_graph = ge::GraphUtils::FindRootGraph(node_ptr->GetOwnerComputeGraph()); | |||
std::map<std::string, std::string> unsupported_reasons; | |||
for (const auto &it : op_infos) { | |||
if (it.engine == exclude_core_Type) { | |||
continue; | |||
} | |||
auto &kernel_map = ops_kernel_manager.GetAllOpsKernelInfoStores(); | |||
auto &kernel_name = it.opKernelLib; | |||
auto kernel_info_store = kernel_map.find(kernel_name); | |||
if (kernel_info_store != kernel_map.end()) { | |||
std::string unsupported_reason; | |||
// It will be replaced by engine' checksupport | |||
uint64_t start_time = GetCurrentTimestamp(); | |||
if (kernel_info_store->second->CheckSupported(node_ptr, unsupported_reason)) { | |||
checksupport_cost_[kernel_name] += GetCurrentTimestamp() - start_time; | |||
op_desc->SetOpEngineName(it.engine); | |||
op_desc->SetOpKernelLibName(kernel_name); | |||
// set attrs for taking information when load txt to graph object | |||
(void) AttrUtils::SetStr(op_desc, ATTR_NAME_ENGINE_NAME_FOR_LX, it.engine); | |||
(void) AttrUtils::SetStr(op_desc, ATTR_NAME_KKERNEL_LIB_NAME_FOR_LX, kernel_name); | |||
GELOGD("DNNEngineManager:Set OpKernelLibName %s and engine name %s to op_desc %s", kernel_name.c_str(), | |||
it.engine.c_str(), op_desc->GetName().c_str()); | |||
return it.engine; | |||
} else { | |||
checksupport_cost_[kernel_name] += GetCurrentTimestamp() - start_time; | |||
unsupported_reasons.emplace(kernel_name, unsupported_reason); | |||
GELOGI("DNNEngineManager:Check support failed, kernel_name is %s, op type is %s, op name is %s", | |||
kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str()); | |||
if (!op_desc->HasAttr("_is_ge_op")) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("W11001", {"opname"}, {op_desc->GetName()}); | |||
} | |||
const auto &kernel_name = it.opKernelLib; | |||
auto kernel_info_store = ops_kernel_manager.GetOpsKernelInfoStore(kernel_name); | |||
if (kernel_info_store == nullptr) { | |||
GELOGW("DNNEngineManager:Can not find any supported ops kernel info store by kernel_name %s, op type is %s, " | |||
"op name is %s", kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str()); | |||
} | |||
std::string unsupported_reason; | |||
// It will be replaced by engine's check support | |||
uint64_t start_time = GetCurrentTimestamp(); | |||
if (kernel_info_store->CheckSupported(node_ptr, unsupported_reason)) { | |||
checksupport_cost_[kernel_name] += GetCurrentTimestamp() - start_time; | |||
op_desc->SetOpEngineName(it.engine); | |||
op_desc->SetOpKernelLibName(kernel_name); | |||
// set attrs for taking information when load txt to graph object | |||
if (it.flagAsync) { | |||
GELOGD("Set aicpu blocking op:%s attribute(is_blocking_op):true", op_desc->GetName().c_str()); | |||
(void)AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); | |||
} | |||
(void) AttrUtils::SetStr(op_desc, ATTR_NAME_ENGINE_NAME_FOR_LX, it.engine); | |||
(void) AttrUtils::SetStr(op_desc, ATTR_NAME_KKERNEL_LIB_NAME_FOR_LX, kernel_name); | |||
GELOGD("DNNEngineManager:Set kernel_lib %s, atomic engine %s, to node %s", kernel_name.c_str(), it.engine.c_str(), | |||
op_desc->GetName().c_str()); | |||
return it.engine; | |||
} else { | |||
GELOGW( | |||
"DNNEngineManager:Can not find any supported ops kernel info store by kernel_name %s," | |||
"op type is %s, op name is %s", | |||
kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str()); | |||
checksupport_cost_[kernel_name] += GetCurrentTimestamp() - start_time; | |||
unsupported_reasons.emplace(kernel_name, unsupported_reason); | |||
GELOGI("DNNEngineManager:Check support failed, kernel_name is %s, op type is %s, op name is %s", | |||
kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str()); | |||
if (!op_desc->HasAttr("_is_ge_op")) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("W11001", {"opname"}, {op_desc->GetName()}); | |||
} | |||
} | |||
} | |||
@@ -272,6 +270,7 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) { | |||
op_desc->GetType().c_str(), it.first.c_str(), it.second.c_str()); | |||
} | |||
auto root_graph = ge::GraphUtils::FindRootGraph(node_ptr->GetOwnerComputeGraph()); | |||
analyzer::DataInfo analyze_info{root_graph->GetSessionID(), root_graph->GetGraphID(), | |||
analyzer::CHECKSUPPORT, node_ptr, reason}; | |||
// do not change original process | |||
@@ -285,6 +284,157 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) { | |||
return ""; | |||
} | |||
std::string DNNEngineManager::GetCompoundEngineName(const ge::NodePtr &node_ptr, uint32_t recursive_depth) { | |||
if ((node_ptr == nullptr) || (node_ptr->GetOpDesc() == nullptr)) { | |||
return ""; | |||
} | |||
const auto &op_desc = node_ptr->GetOpDesc(); | |||
if (recursive_depth > kMaxRecursiveDepth) { | |||
REPORT_INNER_ERROR("E19999", "Get CompoundEngineName will be terminated because too many nesting levels(%d) of " | |||
"subgraphs, last node is %s", recursive_depth, op_desc->GetName().c_str()); | |||
GELOGE(PARAM_INVALID, | |||
"[Check][Param] Get CompoundEngineName will be terminated because too many nesting levels(%d) of subgraphs, " | |||
"last node is %s", recursive_depth, op_desc->GetName().c_str()); | |||
return ""; | |||
} | |||
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { | |||
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Get][CompoundEngineName]Failed, gelib not init before"); | |||
REPORT_INNER_ERROR("E19999", "Get CompoundEngineName failed, gelib not init before"); | |||
return ""; | |||
} | |||
if (instance_ptr->OpsKernelManagerObj().GetCompoundEngineContains().empty() || | |||
instance_ptr->OpsKernelManagerObj().GetCompoundEngineKernelLibName().empty()) { | |||
return ""; | |||
} | |||
// compound engine name exist | |||
std::string compound_engine_name; | |||
(void)AttrUtils::GetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_NAME, compound_engine_name); | |||
std::string compound_engine_kernel_lib_name; | |||
(void)AttrUtils::GetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_KERNEL_LIB_NAME, compound_engine_kernel_lib_name); | |||
if (!(compound_engine_name.empty() || compound_engine_kernel_lib_name.empty())) { | |||
return compound_engine_name; | |||
} | |||
// normal node without subgraph | |||
if (op_desc->GetSubgraphInstanceNames().empty()) { | |||
auto atomic_engine_name = op_desc->GetOpEngineName(); | |||
if (atomic_engine_name.empty()) { | |||
atomic_engine_name = GetDNNEngineName(node_ptr); | |||
} | |||
compound_engine_name = GetOwningCompoundEngine(atomic_engine_name); | |||
compound_engine_kernel_lib_name = GetCompoundEngineKernelLibName(compound_engine_name); | |||
if (compound_engine_name.empty() || compound_engine_kernel_lib_name.empty()) { | |||
(void)op_desc->DelAttr(ATTR_NAME_COMPOUND_ENGINE_NAME); | |||
(void)op_desc->DelAttr(ATTR_NAME_COMPOUND_ENGINE_KERNEL_LIB_NAME); | |||
} else { | |||
GELOGI("Assign compound engine %s, kernel lib name %s for node %s.", | |||
compound_engine_name.c_str(), compound_engine_kernel_lib_name.c_str(), op_desc->GetName().c_str()); | |||
(void)AttrUtils::SetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_NAME, compound_engine_name); | |||
(void)AttrUtils::SetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_KERNEL_LIB_NAME, compound_engine_kernel_lib_name); | |||
} | |||
return compound_engine_name; | |||
} | |||
bool graph_diff_compound_engine_flag = false; | |||
std::string graph_compound_engine_name = kInvalidCompoundEngineName; | |||
std::vector<ComputeGraphPtr> subgraphs; | |||
if (NodeUtils::GetSubgraphs(node_ptr, subgraphs) != GRAPH_SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Get subgraphs of node %s failed", op_desc->GetName().c_str()); | |||
GELOGE(FAILED, "[Check][Param] Get subgraphs of node %s failed", op_desc->GetName().c_str()); | |||
return ""; | |||
} | |||
for (const auto &subgraph : subgraphs) { | |||
std::string cur_graph_compound_engine_name; | |||
// if subgraph has been assigned | |||
if (subgraph->HasAttr(ATTR_NAME_COMPOUND_ENGINE_NAME)) { | |||
(void)AttrUtils::GetStr(subgraph, ATTR_NAME_COMPOUND_ENGINE_NAME, cur_graph_compound_engine_name); | |||
} else { | |||
bool node_diff_compound_engine_flag = false; | |||
std::string node_compound_engine_name = kInvalidCompoundEngineName; | |||
uint32_t assign_node_num = 0; | |||
for (const auto &cur_node : subgraph->GetDirectNode()) { | |||
if (IsStreamAssignSkip(cur_node) && cur_node->GetOpDesc()->GetSubgraphInstanceNames().empty()) { | |||
continue; | |||
} | |||
assign_node_num++; | |||
std::string cur_node_compound_engine_name = GetCompoundEngineName(cur_node, recursive_depth + 1); | |||
if (node_compound_engine_name == kInvalidCompoundEngineName) { | |||
node_compound_engine_name = cur_node_compound_engine_name; | |||
} else if (node_compound_engine_name != cur_node_compound_engine_name) { | |||
node_diff_compound_engine_flag = true; | |||
break; | |||
} | |||
} | |||
if (assign_node_num == 0) { | |||
GELOGD("all nodes in subgraph %s belongs to ge_local engine", subgraph->GetName().c_str()); | |||
continue; | |||
} | |||
if (!(node_diff_compound_engine_flag || | |||
(node_compound_engine_name == kInvalidCompoundEngineName) || | |||
node_compound_engine_name.empty())) { | |||
GELOGI("Assign compound engine %s for subgraph %s.", node_compound_engine_name.c_str(), subgraph->GetName().c_str()); | |||
(void)AttrUtils::SetStr(subgraph, ATTR_NAME_COMPOUND_ENGINE_NAME, node_compound_engine_name); | |||
cur_graph_compound_engine_name = node_compound_engine_name; | |||
} else { | |||
(void)subgraph->DelAttr(ATTR_NAME_COMPOUND_ENGINE_NAME); | |||
cur_graph_compound_engine_name.clear(); | |||
} | |||
} | |||
if (graph_compound_engine_name == kInvalidCompoundEngineName) { | |||
graph_compound_engine_name = cur_graph_compound_engine_name; | |||
} else if (graph_compound_engine_name != cur_graph_compound_engine_name) { | |||
graph_diff_compound_engine_flag = true; | |||
break; | |||
} | |||
} | |||
compound_engine_kernel_lib_name = GetCompoundEngineKernelLibName(graph_compound_engine_name); | |||
if (!(graph_diff_compound_engine_flag || (graph_compound_engine_name == kInvalidCompoundEngineName) || | |||
graph_compound_engine_name.empty() || compound_engine_kernel_lib_name.empty())) { | |||
compound_engine_name = graph_compound_engine_name; | |||
GELOGI("Assign compound engine %s, kernel lib name %s for node %s.", | |||
compound_engine_name.c_str(), compound_engine_kernel_lib_name.c_str(), op_desc->GetName().c_str()); | |||
(void)AttrUtils::SetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_NAME, compound_engine_name); | |||
(void)AttrUtils::SetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_KERNEL_LIB_NAME, compound_engine_kernel_lib_name); | |||
} else { | |||
(void)op_desc->DelAttr(ATTR_NAME_COMPOUND_ENGINE_NAME); | |||
(void)op_desc->DelAttr(ATTR_NAME_COMPOUND_ENGINE_KERNEL_LIB_NAME); | |||
} | |||
return compound_engine_name; | |||
} | |||
std::string DNNEngineManager::GetOwningCompoundEngine(const string &atomic_engine_name) { | |||
if (atomic_2_compound_.empty()) { | |||
InitAtomicCompoundMapping(); | |||
} | |||
const auto &iter = atomic_2_compound_.find(atomic_engine_name); | |||
if (iter == atomic_2_compound_.end()) { | |||
GELOGW("Compound engine which contains atomic engine %s is not registered", atomic_engine_name.c_str()); | |||
return ""; | |||
} | |||
return iter->second; | |||
} | |||
std::string DNNEngineManager::GetCompoundEngineKernelLibName(const string &compound_engine_name) const { | |||
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { | |||
GELOGW("[Get][CompoundEngineKernelLibName]Failed, gelib not init before"); | |||
return ""; | |||
} | |||
const auto &compound_engine_2_kernel_lib_name = instance_ptr->OpsKernelManagerObj().GetCompoundEngineKernelLibName(); | |||
const auto &iter = compound_engine_2_kernel_lib_name.find(compound_engine_name); | |||
if (iter == compound_engine_2_kernel_lib_name.end()) { | |||
GELOGW("Kernel lib name of compound engine %s is not registered", compound_engine_name.c_str()); | |||
return ""; | |||
} | |||
return iter->second; | |||
} | |||
std::string DNNEngineManager::GetHostCpuEngineName(const std::vector<OpInfo> &op_infos, | |||
const OpDescPtr &op_desc) const { | |||
for (const auto &it : op_infos) { | |||
@@ -418,8 +568,8 @@ Status DNNEngineManager::ParserEngineMessage(const json engines_json, const std: | |||
engine_conf_ptr->independent = engines_elems[kIndependent]; | |||
} | |||
if (engines_elems.find(kAttch) != engines_elems.end()) { | |||
engine_conf_ptr->attach = engines_elems[kAttch]; | |||
if (engines_elems.find(kAttach) != engines_elems.end()) { | |||
engine_conf_ptr->attach = engines_elems[kAttach]; | |||
} | |||
if (engines_elems.find(kSkipAssignStream) != engines_elems.end()) { | |||
@@ -500,6 +650,9 @@ Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle h | |||
Status DNNEngineManager::CheckJsonFile() { | |||
GELOGD("Begin to check json file"); | |||
for (auto &it : engines_map_) { | |||
if (!it.second->IsAtomic()) { | |||
continue; | |||
} | |||
std::string engine_name = it.first; | |||
int count = 0; | |||
for (auto &iter : schedulers_) { | |||
@@ -527,4 +680,69 @@ Status DNNEngineManager::CheckJsonFile() { | |||
GELOGD("Check json file success"); | |||
return SUCCESS; | |||
} | |||
void DNNEngineManager::InitAtomicCompoundMapping() { | |||
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { | |||
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Get][CompoundEngineName]Failed, gelib not init before"); | |||
REPORT_INNER_ERROR("E19999", "Get CompoundEngineName failed, gelib not init before"); | |||
return; | |||
} | |||
const auto &compound_engine_2_kernel_lib_name = instance_ptr->OpsKernelManagerObj().GetCompoundEngineKernelLibName(); | |||
for (const auto &item : instance_ptr->OpsKernelManagerObj().GetCompoundEngineContains()) { | |||
const auto &compound_engine = GetEngine(item.first); | |||
if ((compound_engine == nullptr) || compound_engine->IsAtomic()) { | |||
GELOGW("Compound engine %s is not registered", item.first.c_str()); | |||
} | |||
const auto &iter = compound_engine_2_kernel_lib_name.find(item.first); | |||
if ((iter == compound_engine_2_kernel_lib_name.end()) || iter->second.empty()) { | |||
GELOGW("Kernel lib name of compound engine %s is empty", item.first.c_str()); | |||
} | |||
for (const auto &atomic_engine_name : item.second) { | |||
const auto &atomic_engine = GetEngine(atomic_engine_name); | |||
if ((atomic_engine == nullptr) || !atomic_engine->IsAtomic()) { | |||
GELOGW("Atomic engine %s is not registered", atomic_engine_name.c_str()); | |||
continue; | |||
} | |||
auto iter = atomic_2_compound_.find(atomic_engine_name); | |||
if (iter != atomic_2_compound_.end()) { | |||
GELOGW("Atomic engine %s has been contained in compound engine %s, and will be overwritten by engine %s", | |||
atomic_engine_name.c_str(), iter->second.c_str(), item.first.c_str()); | |||
} | |||
atomic_2_compound_[atomic_engine_name] = item.first; | |||
} | |||
} | |||
} | |||
bool DNNEngineManager::IsStreamAssignSkip(const NodePtr &node) { | |||
const auto &op_desc = node->GetOpDesc(); | |||
if (op_desc == nullptr) { | |||
return false; | |||
} | |||
std::string engine_name = op_desc->GetOpEngineName(); | |||
if (engine_name.empty()) { | |||
engine_name = GetDNNEngineName(node); | |||
} | |||
return IsStreamAssignSkip(engine_name); | |||
} | |||
bool DNNEngineManager::IsStreamAssignSkip(const string &engine_name) { | |||
// Only one scheduler has been supported by now | |||
for (const auto &scheduler : schedulers_) { | |||
const map<string, EngineConfPtr> cal_engines = scheduler.second.cal_engines; | |||
auto cal_engines_iter = cal_engines.find(engine_name); | |||
if (cal_engines_iter == cal_engines.end()) { | |||
GELOGW("No cal_engines found within engine %s", engine_name.c_str()); | |||
continue; | |||
} | |||
EngineConfPtr engine_conf_ptr = cal_engines_iter->second; | |||
if (engine_conf_ptr == nullptr) { | |||
GELOGW("engine_conf_ptr within engine %s is null", engine_name.c_str()); | |||
continue; | |||
} | |||
return engine_conf_ptr->skip_assign_stream; | |||
} | |||
return false; | |||
} | |||
} // namespace ge |
@@ -61,12 +61,18 @@ class DNNEngineManager { | |||
public: | |||
friend class GELib; | |||
std::shared_ptr<ge::DNNEngine> GetEngine(const std::string &name) const; | |||
const std::map<std::string, DNNEnginePtr> &GetAllEngines() const { return engines_map_; } | |||
bool IsEngineRegistered(const std::string &name); | |||
// If can't find appropriate engine name, return "", report error | |||
string GetDNNEngineName(const ge::NodePtr &node_ptr); | |||
string GetCompoundEngineName(const ge::NodePtr &node_ptr, uint32_t recursive_depth = 1); | |||
string GetOwningCompoundEngine(const string &atomic_engine_name); | |||
string GetCompoundEngineKernelLibName(const string &compound_engine_name) const; | |||
const map<string, SchedulerConf> &GetSchedulers() const; | |||
const map<string, uint64_t> &GetCheckSupportCost() const; | |||
void InitPerformanceStaistic(); | |||
void InitPerformanceStatistic(); | |||
bool IsStreamAssignSkip(const NodePtr &node); | |||
bool IsStreamAssignSkip(const string &engine_name); | |||
private: | |||
DNNEngineManager(); | |||
@@ -79,11 +85,15 @@ class DNNEngineManager { | |||
map<string, EngineConfPtr> &engines); | |||
Status CheckJsonFile(); | |||
std::string GetHostCpuEngineName(const std::vector<OpInfo> &op_infos, const OpDescPtr &op_desc) const; | |||
void InitAtomicCompoundMapping(); | |||
PluginManager plugin_mgr_; | |||
std::map<std::string, DNNEnginePtr> engines_map_; | |||
std::map<std::string, ge::DNNEngineAttribute> engines_attrs_map_; | |||
std::map<string, SchedulerConf> schedulers_; | |||
std::map<string, uint64_t> checksupport_cost_; | |||
// {atomic_engine, compound_engine} | |||
std::map<std::string, std::string> atomic_2_compound_{}; | |||
bool init_flag_; | |||
mutable std::mutex mutex_; | |||
}; | |||
@@ -72,7 +72,7 @@ bool LabelGotoTask::Distribute() { | |||
return false; | |||
} | |||
rt_ret = rtLabelListCpy(reinterpret_cast<void**>(label_list.data()), label_list.size(), label_info_, label_info_size); | |||
rt_ret = rtLabelListCpy(const_cast<void**>(label_list.data()), label_list.size(), label_info_, label_info_size); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | |||
return false; | |||
@@ -1158,7 +1158,6 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> | |||
if (ret != SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "build graph failed, graph id:%u, ret:%d", graph_id, ret); | |||
GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "[Build][Graph] fail, graph id: %u", graph_id); | |||
ret = GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; | |||
} | |||
RtContextUtil::GetInstance().DestroyRtContexts(session_id); | |||
@@ -18,7 +18,6 @@ | |||
#include "framework/common/types.h" | |||
#include "framework/common/util.h" | |||
#include "framework/common/ge_inner_error_codes.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "graph/utils/graph_utils.h" | |||
#include "graph/label/label_maker.h" | |||
@@ -85,8 +84,9 @@ bool LabelAllocator::CollectFunctionalNode(ComputeGraphPtr &graph, std::set<Node | |||
return false; | |||
} | |||
if (func_node->GetOpDesc() != nullptr && func_node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH)) { | |||
GELOGD("Graph[%s] is ffts subgraph, skip label allocator.", graph->GetName().c_str()); | |||
if (func_node->GetOpDesc() != nullptr && (func_node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH) || | |||
func_node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_PLUS_SUB_GRAPH))) { | |||
GELOGD("Graph[%s] is ffts/ffts+ subgraph, skip label allocator.", graph->GetName().c_str()); | |||
return true; | |||
} | |||
@@ -275,7 +275,7 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<uint64_t, siz | |||
"E19022", std::vector<std::string>({"size", "item", "maxsize"}), | |||
std::vector<std::string>({std::to_string(total_mem_offset), "featuremap", | |||
std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())})); | |||
return ge::FAILED; | |||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -29,9 +29,10 @@ Status MemoryAssigner::AssignMemory(bool is_loop_graph, map<uint64_t, size_t> &m | |||
} | |||
// Reassign memory for special nodes | |||
if (graph_mem_assigner.ReAssignMemory(is_loop_graph, mem_offset) != ge::SUCCESS) { | |||
Status ret = graph_mem_assigner.ReAssignMemory(is_loop_graph, mem_offset); | |||
if (ret != ge::SUCCESS) { | |||
GELOGE(ge::FAILED, "[ReAssign][Memory] failed, graph:%s", compute_graph_->GetName().c_str()); | |||
return ge::FAILED; | |||
return ret; | |||
} | |||
// Assign memory (block and offset) for zero copy nodes | |||
@@ -17,18 +17,12 @@ | |||
#include "graph/build/stream_allocator.h" | |||
#include <algorithm> | |||
#include <memory> | |||
#include "common/ge/ge_util.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "framework/common/fmk_error_codes.h" | |||
#include "framework/common/types.h" | |||
#include "graph/build/logical_stream_allocator.h" | |||
#include "common/omg_util.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "graph/ge_context.h" | |||
#include "graph/utils/graph_utils.h" | |||
#include "init/gelib.h" | |||
#include "framework/common/string_util.h" | |||
#include "common/util/error_manager/error_manager.h" | |||
using std::map; | |||
using std::set; | |||
@@ -433,7 +427,8 @@ Status StreamAllocator::SetActiveStreamsForSubgraphs() { | |||
// Insert the send/recv event id to the graph | |||
Status StreamAllocator::InsertSyncEvents() { | |||
auto ffts_filter = [](const Node &node, const char *, const ComputeGraphPtr &) { | |||
return !node.GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH); | |||
return !(node.GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH) || | |||
node.GetOpDesc()->HasAttr(ATTR_NAME_FFTS_PLUS_SUB_GRAPH)); | |||
}; | |||
for (const auto &cur_node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag(), nullptr, ffts_filter)) { | |||
@@ -536,7 +531,9 @@ Status StreamAllocator::InsertEventsForSubgraph() { | |||
for (const auto &subgraph : whole_graph_->GetAllSubgraphs()) { | |||
GE_CHECK_NOTNULL(subgraph); | |||
const auto parent_node = subgraph->GetParentNode(); | |||
if (parent_node != nullptr && parent_node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH)) { | |||
if (parent_node != nullptr && (parent_node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH) || | |||
parent_node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_PLUS_SUB_GRAPH) || | |||
parent_node->GetOpDesc()->HasAttr(ATTR_NAME_THREAD_SCOPE_ID))) { | |||
GELOGD("Skip ffts subgraph, parent node is %s.", parent_node->GetName().c_str()); | |||
continue; | |||
} | |||
@@ -356,7 +356,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||
GE_MAKE_GUARD(release, callback); | |||
auto ffts_filter = [](const Node &node, const char *, const ComputeGraphPtr &) { | |||
return !node.GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH); | |||
return !(node.GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH) || | |||
node.GetOpDesc()->HasAttr(ATTR_NAME_FFTS_PLUS_SUB_GRAPH)); | |||
}; | |||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag(), nullptr, ffts_filter)) { | |||
OpDescPtr op_desc = node->GetOpDesc(); | |||
@@ -371,7 +372,6 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||
continue); | |||
GE_CHK_STATUS_RET(UpdateOpIsVarAttr(op_desc, graph->GetSessionID())); | |||
string op_kernel_lib_name = op_desc->GetOpKernelLibName(); | |||
// For fusion ddb pass, task def must be continuous. | |||
// Part2: Call | |||
auto fusion_task_info = | |||
@@ -384,13 +384,15 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||
GELOGI("Fusion node[name:%s, type:%s] do not need generate task again.", name.c_str(), type.c_str()); | |||
continue; | |||
} | |||
string op_kernel_lib_name = op_desc->GetOpKernelLibName(); | |||
GE_CHK_BOOL_EXEC_INFO(!op_kernel_lib_name.empty(), continue, | |||
"Node[name:%s, type:%s] does not need to generate task.", name.c_str(), type.c_str()); | |||
auto kernel_info_store = ops_kernel_manager.GetOpsKernelInfoStore(op_kernel_lib_name); | |||
GE_CHECK_NOTNULL(kernel_info_store); | |||
GE_CHK_STATUS_RET(UpdateAnchorStatus(node), "[Call][UpdateAnchorStatus] node:%s(%s) failed", name.c_str(), | |||
type.c_str()); | |||
if (node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH)) { | |||
if (node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH) || | |||
node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_PLUS_SUB_GRAPH)) { | |||
GE_CHK_STATUS_RET(UpdateAnchorStatusForFfts(node), "[Call][UpdateAnchorStatusForFfts] node:%s(%s) failed", | |||
name.c_str(), type.c_str()); | |||
} | |||
@@ -409,7 +411,30 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||
GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(), | |||
name.c_str(), type.c_str(), op_id, stream_id); | |||
GE_TIMESTAMP_RESTART(GenerateTask); | |||
auto ret = OpsKernelBuilderManager::Instance().GenerateTask(*node, run_context, task_def_list); | |||
auto ret = SUCCESS; | |||
if (op_desc->HasAttr(ATTR_NAME_FFTS_PLUS_SUB_GRAPH)) { | |||
std::vector<ComputeGraphPtr> subgraphs; | |||
if (NodeUtils::GetSubgraphs(node, subgraphs) != GRAPH_SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Get subgraphs of node %s failed", op_desc->GetName().c_str()); | |||
GELOGE(FAILED, "[Check][Param] Get subgraphs of node %s failed", op_desc->GetName().c_str()); | |||
return FAILED; | |||
} | |||
for (const auto &subgraph : subgraphs) { | |||
for (const auto &tmp_node : subgraph->GetAllNodes()) { | |||
ret = OpsKernelBuilderManager::Instance().GenerateTask(*tmp_node, run_context, task_def_list); | |||
if (ret != SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Call OpsKernelBuilderManager GenerateTask fail for op:%s(%s)", | |||
tmp_node->GetName().c_str(), tmp_node->GetType().c_str()); | |||
GELOGE(ret, "[Generate][Task] fail for op:%s(%s)", tmp_node->GetName().c_str(), | |||
tmp_node->GetType().c_str()); | |||
return ret; | |||
} | |||
} | |||
} | |||
ret = OpsKernelBuilderManager::Instance().GenerateTask(*node, run_context, task_def_list, false); | |||
} else { | |||
ret = OpsKernelBuilderManager::Instance().GenerateTask(*node, run_context, task_def_list); | |||
} | |||
GE_TIMESTAMP_ADD(GenerateTask); | |||
if (ret != SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Call OpsKernelBuilderManager GenerateTask fail for op:%s(%s)", | |||
@@ -100,9 +100,6 @@ const uint32_t kEndOfSequenceNew = 507005; | |||
const int32_t kModelAbortNormal = 0x0704000e; | |||
const int32_t kModelAbortNormalNew = 507024; | |||
const uint32_t kInteval = 2; | |||
const uint32_t kFftsTbeHandleElementSize = 2; | |||
const uint32_t kNonTailBlock = 0; | |||
const uint32_t kTailBlock = 1; | |||
const char *const kModelName = "model_name"; | |||
const char *const kModeleId = "model_id"; | |||
const char *const kLoadStartTime = "load_start_time"; | |||
@@ -238,6 +235,12 @@ DavinciModel::~DavinciModel() { | |||
GE_LOGW_IF(rtEventDestroy(event_list_[i]) != RT_ERROR_NONE, "Destroy event failed, index: %zu", i); | |||
} | |||
for (const auto &it : stream_2_event_) { | |||
if (rtEventDestroy(it.second) != RT_ERROR_NONE) { | |||
GELOGW("Destroy event failed"); | |||
} | |||
} | |||
FreeWeightsMem(); | |||
FreeFeatureMapMem(); | |||
@@ -3736,33 +3739,32 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) { | |||
Status DavinciModel::InitTbeHandleWithFfts(const OpDescPtr &op_desc) { | |||
std::vector<OpKernelBinPtr> tbe_kernel; | |||
tbe_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_NAME_THREAD_TBE_KERNEL, tbe_kernel); | |||
GELOGD("Kernel bin ptr vec size is %zu.", tbe_kernel.size()); | |||
if (tbe_kernel.size() != kFftsTbeHandleElementSize) { | |||
REPORT_INNER_ERROR("E19999", "Get tbe_kernel for op:%s(%s) fail, model_id:%u", | |||
op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_); | |||
GELOGE(INTERNAL_ERROR, "[Check][Param] TBE: %s can't find tvm bin file, size is %zu when ffts", | |||
op_desc->GetName().c_str(), tbe_kernel.size()); | |||
std::vector<string> bin_file_keys; | |||
(void)AttrUtils::GetListStr(op_desc, kStubFuncName, bin_file_keys); | |||
if (tbe_kernel.size() != bin_file_keys.size()) { | |||
REPORT_INNER_ERROR("E19999", "[%s] number of bin_file != number of file_name, bin_file_num=%zu, file_name_num=%zu", | |||
op_desc->GetName().c_str(), tbe_kernel.size(), bin_file_keys.size()); | |||
GELOGE(INTERNAL_ERROR, | |||
"[Check][Param] [%s] number of bin_file != number of file_name, bin_file_num=%zu, file_name_num=%zu", | |||
op_desc->GetName().c_str(), tbe_kernel.size(), bin_file_keys.size()); | |||
return INTERNAL_ERROR; | |||
} | |||
if (tbe_kernel[0] == nullptr || tbe_kernel[1] == nullptr) { | |||
REPORT_INNER_ERROR("E19999", "Tbe kernel for op:%s is nullptr.", op_desc->GetName().c_str()); | |||
GELOGE(INTERNAL_ERROR, "[Check][Param] TBE: tvm bin file of %s is nullptr when ffts.", op_desc->GetName().c_str()); | |||
if (tbe_kernel.empty()) { | |||
REPORT_INNER_ERROR("E19999", "[%s] tbe kernel is empty", op_desc->GetName().c_str()); | |||
GELOGE(INTERNAL_ERROR, "[Check][Param] [%s] tbe kernel is empty", op_desc->GetName().c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
vector<string> bin_file_keys; | |||
(void)AttrUtils::GetListStr(op_desc, kStubFuncName, bin_file_keys); | |||
if (bin_file_keys.size() != kFftsTbeHandleElementSize) { | |||
REPORT_INNER_ERROR("E19999", "Get bin_file for op:%s(%s) fail.", op_desc->GetName().c_str(), | |||
op_desc->GetType().c_str()); | |||
GELOGE(INTERNAL_ERROR, "[Check][Param] TBE: %s can't find bin file keys, size is %zu when ffts", | |||
op_desc->GetName().c_str(), bin_file_keys.size()); | |||
return INTERNAL_ERROR; | |||
size_t num = tbe_kernel.size(); | |||
GELOGD("Kernel bin num is %zu", num); | |||
for (size_t i = 0; i < num; i++) { | |||
if (tbe_kernel[i] == nullptr) { | |||
REPORT_INNER_ERROR("E19999", "Tbe kernel for op:%s is nullptr.", op_desc->GetName().c_str()); | |||
GELOGE(INTERNAL_ERROR, "[Check][Param] TBE: tvm bin file of %s is nullptr when ffts.", op_desc->GetName().c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
GE_CHK_STATUS_RET(FunctionRegister(op_desc, bin_file_keys[i], tbe_kernel[i], true, i), | |||
"Function register of No. %zu bin file %s failed.", i, bin_file_keys[i].c_str()); | |||
} | |||
GE_CHK_STATUS_RET(FunctionRegister(op_desc, bin_file_keys[kNonTailBlock], tbe_kernel[kNonTailBlock], true, | |||
kNonTailBlock), | |||
"Function register of first bin file %s failed.", bin_file_keys[kNonTailBlock].c_str()); | |||
GE_CHK_STATUS_RET(FunctionRegister(op_desc, bin_file_keys[kTailBlock], tbe_kernel[kTailBlock], true, kTailBlock), | |||
"Function register of second bin file %s failed.", bin_file_keys[kTailBlock].c_str()); | |||
return SUCCESS; | |||
} | |||
@@ -3809,6 +3811,10 @@ Status DavinciModel::FunctionRegister(const OpDescPtr &op_desc, string &bin_file | |||
GE_CHK_STATUS_RET(InitKernelName(op_desc, is_ffts, thread_index, kernel_name), "Init kernel name of %s failed.", | |||
op_desc->GetName().c_str()); | |||
GE_CHK_RT_RET(rtFunctionRegister(bin_handle, bin_file_key, bin_file_key, kernel_name.c_str(), 0)); | |||
void *addr; | |||
uint32_t prefetch_cnt; | |||
GE_CHK_RT_RET(rtGetAddrAndPrefCntWithHandle(bin_handle, kernel_name.c_str(), &addr, &prefetch_cnt)); | |||
addr_and_pref_cnt_[kernel_name] = { addr, prefetch_cnt }; | |||
used_tbe_handle_map_[bin_file_key] = 1; // Init used num to 1. | |||
return SUCCESS; | |||
} | |||
@@ -3817,6 +3823,18 @@ Status DavinciModel::FunctionRegister(const OpDescPtr &op_desc, string &bin_file | |||
return SUCCESS; | |||
} | |||
Status DavinciModel::GetAddrAndPrefCnt(const std::string &kernel_name, void *&addr, uint32_t &pref_cnt) { | |||
const auto &iter = addr_and_pref_cnt_.find(kernel_name); | |||
if (iter == addr_and_pref_cnt_.end()) { | |||
REPORT_INNER_ERROR("E19999", "Get addr and pref cnt failed, kernel_name:%s", kernel_name.c_str()); | |||
GELOGE(INTERNAL_ERROR, "[Check][Param] Get addr and pref cnt failed, kernel_name:%s", kernel_name.c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
addr = iter->second.first; | |||
pref_cnt = iter->second.second; | |||
return SUCCESS; | |||
} | |||
Status DavinciModel::InitBinaryMagic(const OpDescPtr &op_desc, bool is_ffts, size_t thread_index, | |||
rtDevBinary_t &binary) { | |||
string json_string; | |||
@@ -3830,7 +3848,7 @@ Status DavinciModel::InitBinaryMagic(const OpDescPtr &op_desc, bool is_ffts, siz | |||
if (is_ffts) { | |||
vector<string> json_list; | |||
(void)AttrUtils::GetListStr(op_desc, tvm_magic, json_list); | |||
if (json_list.size() != kFftsTbeHandleElementSize) { | |||
if (json_list.size() <= thread_index) { | |||
GELOGE(INTERNAL_ERROR, "[Check][Param] failed. Attr is %s, thread index is %zu, json list size is %zu.", | |||
tvm_magic.c_str(), thread_index, json_list.size()); | |||
return INTERNAL_ERROR; | |||
@@ -3859,7 +3877,7 @@ Status DavinciModel::InitMetaData(const OpDescPtr &op_desc, bool is_ffts, size_t | |||
if (is_ffts) { | |||
vector<string> meta_data_list; | |||
(void)AttrUtils::GetListStr(op_desc, tvm_metadata, meta_data_list); | |||
if (meta_data_list.size() != kFftsTbeHandleElementSize) { | |||
if (meta_data_list.size() <= thread_index) { | |||
GELOGE(INTERNAL_ERROR, "[Check][Param] failed, attr is %s, thread index is %zu, meta data list size is %zu.", | |||
tvm_metadata.c_str(), thread_index, meta_data_list.size()); | |||
return INTERNAL_ERROR; | |||
@@ -3886,7 +3904,7 @@ Status DavinciModel::InitKernelName(const OpDescPtr &op_desc, bool is_ffts, size | |||
} | |||
string attr_kernel_name = op_desc->GetName().substr(pos + 1) + "_thread_kernelname"; | |||
(void)AttrUtils::GetListStr(op_desc, attr_kernel_name, kernel_name_list); | |||
if (kernel_name_list.size() != kFftsTbeHandleElementSize) { | |||
if (kernel_name_list.size() <= thread_index) { | |||
GELOGE(INTERNAL_ERROR, "[Check][Param] failed, attr is %s, thread index is %zu, kernel name list size is %zu.", | |||
attr_kernel_name.c_str(), thread_index, kernel_name_list.size()); | |||
return INTERNAL_ERROR; | |||
@@ -4648,4 +4666,50 @@ Status DavinciModel::GetTotalMemSizeExcludeZeroCopy(int64_t &total_useful_size) | |||
total_useful_size = runtime_param_.mem_size - runtime_param_.zero_copy_size; | |||
return SUCCESS; | |||
} | |||
Status DavinciModel::GetEventIdForBlockingAicpuOp(const OpDescPtr &op_desc, rtStream_t stream, uint32_t &event_id) { | |||
GELOGI("Get event id for aicpu blocking op:%s", op_desc->GetName().c_str()); | |||
auto it = stream_2_event_.find(stream); | |||
if (it != stream_2_event_.end()) { | |||
auto rt_ret = rtGetEventID(it->second, &event_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtGetEventID failed for op:%s(%s), ret:0x%X", | |||
op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret); | |||
GELOGE(RT_FAILED, "[Call][rtGetEventID] failed for op:%s(%s), ret:0x%X", | |||
op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
} else { | |||
rtEvent_t rt_event = nullptr; | |||
auto rt_ret = rtEventCreateWithFlag(&rt_event, RT_EVENT_WITH_FLAG); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtEventCreateWithFlag failed for op:%s(%s), ret:0x%X", | |||
op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret); | |||
GELOGE(RT_FAILED, "[Call][rtEventCreateWithFlag] failed for op:%s(%s), ret:0x%X", | |||
op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtGetEventID(rt_event, &event_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtGetEventID failed for op:%s(%s), ret:0x%X", | |||
op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret); | |||
GELOGE(RT_FAILED, "[Call][rtGetEventID] failed for op:%s(%s), ret:0x%X", | |||
op_desc->GetName().c_str(), op_desc->GetType().c_str(), rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
stream_2_event_.emplace(stream, rt_event); | |||
} | |||
return SUCCESS; | |||
} | |||
Status DavinciModel::GetEventByStream(const rtStream_t &stream, rtEvent_t &rt_event) { | |||
auto it = stream_2_event_.find(stream); | |||
if (it == stream_2_event_.end()) { | |||
REPORT_INNER_ERROR("E19999", "Get event failed"); | |||
GELOGE(FAILED, "[Get][Event] Get event failed"); | |||
return FAILED; | |||
} | |||
rt_event = it->second; | |||
return SUCCESS; | |||
} | |||
} // namespace ge |
@@ -582,6 +582,12 @@ class DavinciModel { | |||
void SetRunningFlag(bool flag) { running_flg_ = flag; } | |||
Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback); | |||
// for blocking aicpu op | |||
Status GetEventByStream(const rtStream_t &stream, rtEvent_t &rt_event); | |||
Status GetEventIdForBlockingAicpuOp(const OpDescPtr &op_desc, rtStream_t stream, uint32_t &event_id); | |||
Status GetAddrAndPrefCnt(const std::string &kernel_name, void *&addr, uint32_t &pref_cnt); | |||
private: | |||
// memory address of weights | |||
uint8_t *weights_mem_base_; | |||
@@ -1021,6 +1027,8 @@ class DavinciModel { | |||
map<string, uint32_t> used_tbe_handle_map_; | |||
std::map<std::string, std::pair<void *, uint32_t>> addr_and_pref_cnt_; | |||
// for profiling task and graph info | |||
vector<TaskDescInfo> task_desc_info_; | |||
@@ -1107,6 +1115,8 @@ class DavinciModel { | |||
// op name to attrs mapping | |||
std::map<std::string, std::map<std::string, std::vector<std::string>>> op_name_to_attrs_; | |||
std::map<rtStream_t, rtEvent_t> stream_2_event_; | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ |
@@ -0,0 +1,977 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include "graph/load/model_manager/task_info/ffts_plus_task_info.h" | |||
#include "graph/load/model_manager/davinci_model.h" | |||
namespace { | |||
constexpr uint32_t kAddrLen = sizeof(void *); | |||
constexpr uint32_t kSrcSlotNum = 4; | |||
constexpr uint32_t kWriteValueNum = 4; | |||
constexpr uint32_t kUserDataNum = 9; | |||
constexpr uint32_t kNonTailIndex = 0; | |||
constexpr uint32_t kTailIndex = 1; | |||
constexpr uint32_t kAicAivCtxPcNum = 2; | |||
constexpr uint32_t kNonTailAicCtxIndex = 0; | |||
constexpr uint32_t kTailAicCtxIndex = 1; | |||
constexpr uint32_t kNonTailAivCtxIndex = 2; | |||
constexpr uint32_t kTailAivCtxIndex = 3; | |||
constexpr uint32_t kMixAicAivCtxPcNum = 4; | |||
} | |||
namespace ge { | |||
FftsPlusTaskInfo::~FftsPlusTaskInfo() { | |||
GE_FREE_RT_LOG(args_); | |||
} | |||
Status FftsPlusTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
GELOGI("Init Start"); | |||
GE_CHECK_NOTNULL(davinci_model); | |||
davinci_model_ = davinci_model; | |||
GE_CHK_STATUS_RET_NOLOG(SetStream(task_def.stream_id(), davinci_model_->GetStreamList())); | |||
const domi::FftsPlusTaskDef &ffts_plus_task_def = task_def.ffts_plus_task(); | |||
OpDescPtr op_desc = davinci_model_->GetOpByIndex(ffts_plus_task_def.op_index()); | |||
GE_CHECK_NOTNULL(op_desc); | |||
args_size_ = kAddrLen * ffts_plus_task_def.addr_size(); | |||
if (args_size_ != 0) { | |||
GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM)); | |||
} | |||
std::vector<uint8_t> sqe_buffer(sizeof(rtFftsPlusSqe_t)); | |||
auto ffts_plus_sqe = reinterpret_cast<rtFftsPlusSqe_t *>(sqe_buffer.data()); | |||
InitFftsPlusSqe(ffts_plus_task_def.ffts_plus_sqe(), ffts_plus_sqe); | |||
ffts_plus_task_info_.fftsPlusSqe = ffts_plus_sqe; | |||
size_t ctx_num = ffts_plus_task_def.ffts_plus_ctx_size(); | |||
ffts_plus_task_info_.descBufLen = sizeof(rtFftsPlusComCtx_t) * ctx_num; | |||
std::vector<uint8_t> ctx_buffer(ffts_plus_task_info_.descBufLen); | |||
auto ctx = reinterpret_cast<void *>(ctx_buffer.data()); | |||
GE_CHK_STATUS_RET_NOLOG(InitFftsPlusCtx(ffts_plus_task_def, ctx_num, ctx)); | |||
ffts_plus_task_info_.descBuf = reinterpret_cast<void *>(ctx_buffer.data()); | |||
if (args_ != nullptr) { | |||
size_t data_size = kAddrLen * io_addrs_.size(); | |||
GE_CHK_RT_RET(rtMemcpy(args_, args_size_, io_addrs_.data(), data_size, RT_MEMCPY_HOST_TO_DEVICE)); | |||
} | |||
GELOGI("Init Success. Node: %s, input/output size: %zu", op_desc->GetName().c_str(), io_addrs_.size()); | |||
return SUCCESS; | |||
} | |||
void FftsPlusTaskInfo::InitFftsPlusSqe(const domi::FftsPlusSqeDef &sqe_def, rtFftsPlusSqe_t *&sqe) { | |||
InitFftsPlusSqeHeader(sqe_def.sqe_header(), sqe->sqeHeader); | |||
sqe->pmg = static_cast<uint16_t>(sqe_def.pmg() & 0X00000003); // 2 bits, 0000,0011 | |||
sqe->ns = static_cast<uint16_t>(sqe_def.ns() & 0X00000001); // 1 bit , 0000,0001 | |||
sqe->partId = static_cast<uint16_t>(sqe_def.part_id() & 0X000000FF); // 8 bits, 1111,1111 | |||
sqe->qos = static_cast<uint16_t>(sqe_def.qos() & 0X0000000F); // 4 bits, 0000,1111 | |||
sqe->totalContextNum = static_cast<uint16_t>(sqe_def.total_context_num()); | |||
sqe->readyContextNum = static_cast<uint16_t>(sqe_def.ready_context_num()); | |||
sqe->preloadContextNum = static_cast<uint16_t>(sqe_def.preload_context_num()); | |||
sqe->dsplitUnit = static_cast<uint16_t>(sqe_def.dsplit_unit() & 0X00000007); // 3 bits, 0000,0111 | |||
sqe->prefetchOstNum = static_cast<uint16_t>(sqe_def.prefetch_ost_num() & 0X0000001F); // 5 bit , 0001,1111 | |||
sqe->cmaintOstNum = static_cast<uint16_t>(sqe_def.cmaint_ost_num() & 0X0000001F); // 5 bits, 0001,1111 | |||
sqe->aicPrefetchLower = static_cast<uint16_t>(sqe_def.aic_prefetch_lower() & 0X0000001F); // 5 bit , 0001,1111 | |||
sqe->aicPrefetchUpper = static_cast<uint16_t>(sqe_def.aic_prefetch_upper() & 0X0000001F); // 5 bit , 0001,1111 | |||
sqe->aivPrefetchLower = static_cast<uint16_t>(sqe_def.aiv_prefetch_lower() & 0X0000001F); // 5 bit , 0001,1111 | |||
sqe->aivPrefetchUpper = static_cast<uint16_t>(sqe_def.aiv_prefetch_upper() & 0X0000001F); // 5 bit , 0001,1111 | |||
} | |||
void FftsPlusTaskInfo::InitFftsPlusSqeHeader(const domi::StarsSqeHeaderDef &sqe_header_def, | |||
rtStarsSqeHeader_t &sqe_header) { | |||
sqe_header.l1Lock = static_cast<uint8_t>(sqe_header_def.l1_lock()); | |||
sqe_header.l1Unlock = static_cast<uint8_t>(sqe_header_def.l1_unlock()); | |||
sqe_header.blockDim = static_cast<uint16_t>(sqe_header_def.block_dim()); | |||
} | |||
Status FftsPlusTaskInfo::InitFftsPlusCtx(const domi::FftsPlusTaskDef &task_def, size_t ctx_num, void *&ctx) { | |||
for (size_t i = 0; i < ctx_num; i++) { | |||
const domi::FftsPlusCtxDef &ctx_def = task_def.ffts_plus_ctx(i); | |||
GELOGI("Init ctx %zu in FftsPlusTask, software_ctx_type=%u, hardware_ctx_type=%u", i, ctx_def.software_ctx_type(), | |||
ctx_def.hardware_ctx_type()); | |||
auto cur_ctx = reinterpret_cast<uintptr_t>(ctx) + sizeof(rtFftsPlusComCtx_t) * i; | |||
switch (ctx_def.software_ctx_type()) { | |||
case RT_SOFT_CTX_TYPE_AT_START: { | |||
auto at_start_ctx = reinterpret_cast<rtFftsPlusAtStartCtx_t *>(cur_ctx); | |||
GE_CHK_STATUS_RET_NOLOG(InitAtStartCtx(ctx_def.at_start_ctx(), at_start_ctx)); | |||
break; | |||
} | |||
case RT_SOFT_CTX_TYPE_AT_END: { | |||
auto at_end_ctx = reinterpret_cast<rtFftsPlusAtEndCtx_t *>(cur_ctx); | |||
GE_CHK_STATUS_RET_NOLOG(InitAtEndCtx(ctx_def.at_end_ctx(), at_end_ctx)); | |||
break; | |||
} | |||
case RT_SOFT_CTX_TYPE_LABEL: { | |||
auto label_ctx = reinterpret_cast<rtFftsPlusLabelCtx_t *>(cur_ctx); | |||
GE_CHK_STATUS_RET_NOLOG(InitLabelCtx(ctx_def.label_ctx(), label_ctx)); | |||
break; | |||
} | |||
default: | |||
GE_CHK_STATUS_RET_NOLOG(InitHardWareCtx(ctx_def, cur_ctx)); | |||
break; | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status FftsPlusTaskInfo::InitAtStartCtx(const domi::FftsPlusAtStartCtxDef &ctx_def, rtFftsPlusAtStartCtx_t *&ctx) { | |||
ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num()); | |||
ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001); // 1 bit, 0000,0001 | |||
ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init()); | |||
ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt()); | |||
if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) { | |||
REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusAtStartCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusAtStartCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) { | |||
ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i)); | |||
} | |||
ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id()); | |||
ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim()); | |||
ctx->threadIdInit = static_cast<uint16_t>(ctx_def.thread_id_init()); | |||
ctx->threadWindowSize = static_cast<uint16_t>(ctx_def.thread_window_size()); | |||
return SUCCESS; | |||
} | |||
Status FftsPlusTaskInfo::InitAtEndCtx(const domi::FftsPlusAtEndCtxDef &ctx_def, rtFftsPlusAtEndCtx_t *&ctx) { | |||
ctx->atStartSlotNumber = static_cast<uint8_t>(ctx_def.at_start_slot_num()); | |||
ctx->outLabelSlotNumber = static_cast<uint8_t>(ctx_def.out_label_slot_num() & 0X0000007F); // 7 bits, 0111,1111 | |||
ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001); // 1 bit, 0000,0001 | |||
ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init()); | |||
ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt()); | |||
if (ctx_def.succ_at_start_slot_size() != RT_CTX_SUCC_AT_START_SLOT_NUM) { | |||
REPORT_INNER_ERROR("E19999", "Size of succ_at_start_slot in FftsPlusAtEndCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCC_AT_START_SLOT_NUM, ctx_def.succ_at_start_slot_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of succ_at_start_slot in FftsPlusAtStartCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCC_AT_START_SLOT_NUM, ctx_def.succ_at_start_slot_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < RT_CTX_SUCC_AT_START_SLOT_NUM; i++) { | |||
ctx->succAtStartSlot[i] = static_cast<uint16_t>(ctx_def.succ_at_start_slot(i)); | |||
} | |||
if (ctx_def.succ_out_label_slot_size() != RT_CTX_SUCC_OUT_LABEL_SLOT_NUM) { | |||
REPORT_INNER_ERROR("E19999", "Size of succ_out_label_slot in FftsPlusAtEndCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCC_OUT_LABEL_SLOT_NUM, ctx_def.succ_out_label_slot_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of succ_out_label_slot in FftsPlusAtStartCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCC_OUT_LABEL_SLOT_NUM, ctx_def.succ_out_label_slot_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < RT_CTX_SUCC_OUT_LABEL_SLOT_NUM; i++) { | |||
ctx->succOutLabelSlot[i] = static_cast<uint16_t>(ctx_def.succ_out_label_slot(i)); | |||
} | |||
ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id()); | |||
return SUCCESS; | |||
} | |||
Status FftsPlusTaskInfo::InitLabelCtx(const domi::FftsPlusLabelCtxDef &ctx_def, rtFftsPlusLabelCtx_t *&ctx) { | |||
ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num()); | |||
ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init()); | |||
ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt()); | |||
if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) { | |||
REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusLabelCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusLabelCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) { | |||
ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i)); | |||
} | |||
return SUCCESS; | |||
} | |||
Status FftsPlusTaskInfo::InitHardWareCtx(const domi::FftsPlusCtxDef &ctx_def, uintptr_t &ctx) { | |||
switch (ctx_def.hardware_ctx_type()) { | |||
case RT_HW_CTX_TYPE_AIC: | |||
case RT_HW_CTX_TYPE_AIV: { | |||
auto aic_aiv_ctx = reinterpret_cast<rtFftsPlusAicAivCtx_t *>(ctx); | |||
GE_CHK_STATUS_RET_NOLOG(InitAicAivCtx(ctx_def.aic_aiv_ctx(), ctx_def.op_index(), aic_aiv_ctx)); | |||
break; | |||
} | |||
case RT_HW_CTX_TYPE_NOTIFY_WAIT: | |||
case RT_HW_CTX_TYPE_NOTIFY_RECORD: { | |||
auto notify_ctx = reinterpret_cast<rtFftsPlusNotifyCtx_t *>(ctx); | |||
GE_CHK_STATUS_RET_NOLOG(InitNotifyCtx(ctx_def.notify_ctx(), notify_ctx)); | |||
break; | |||
} | |||
case RT_HW_CTX_TYPE_WRITE_VALUE: { | |||
auto write_value_ctx = reinterpret_cast<rtFftsPlusWriteValueCtx_t *>(ctx); | |||
GE_CHK_STATUS_RET_NOLOG(InitWriteValueCtx(ctx_def.write_value_ctx(), write_value_ctx)); | |||
break; | |||
} | |||
case RT_HW_CTX_TYPE_MIX_AIC: | |||
case RT_HW_CTX_TYPE_MIX_AIV: { | |||
auto mix_aic_aiv_ctx = reinterpret_cast<rtFftsPlusMixAicAivCtx_t *>(ctx); | |||
GE_CHK_STATUS_RET_NOLOG(InitMixAicAivCtx(ctx_def.mix_aic_aiv_ctx(), ctx_def.op_index(), mix_aic_aiv_ctx)); | |||
break; | |||
} | |||
case RT_HW_CTX_TYPE_SDMA: { | |||
auto sdma_ctx = reinterpret_cast<rtFftsPlusSdmaCtx_t *>(ctx); | |||
GE_CHK_STATUS_RET_NOLOG(InitSdmaCtx(ctx_def.sdma_ctx(), sdma_ctx)); | |||
break; | |||
} | |||
case RT_HW_CTX_TYPE_FLUSH_DATA: | |||
case RT_HW_CTX_TYPE_INVALIDATE_DATA: | |||
case RT_HW_CTX_TYPE_WRITEBACK_DATA: { | |||
auto data_ctx = reinterpret_cast<rtFftsPlusDataCtx_t *>(ctx); | |||
GE_CHK_STATUS_RET_NOLOG(InitDataCtx(ctx_def.data_ctx(), data_ctx)); | |||
break; | |||
} | |||
case RT_HW_CTX_TYPE_AICPU: { | |||
auto aicpu_ctx = reinterpret_cast<rtFftsPlusAiCpuCtx_t *>(ctx); | |||
GE_CHK_STATUS_RET_NOLOG(InitAicpuCtx(ctx_def.aicpu_ctx(), aicpu_ctx)); | |||
break; | |||
} | |||
case RT_HW_CTX_TYPE_LOAD: { | |||
GE_CHK_STATUS_RET_NOLOG(InitLoadCtx(ctx_def, ctx)); | |||
break; | |||
} | |||
default: | |||
REPORT_INNER_ERROR("E19999", "Unsupported hardware ctx type %u", ctx_def.hardware_ctx_type()); | |||
GELOGE(FAILED, "[Check][CtxType] Unsupported hardware ctx type %u", ctx_def.hardware_ctx_type()); | |||
return FAILED; | |||
} | |||
return SUCCESS; | |||
} | |||
Status FftsPlusTaskInfo::InitAicAivCtx(const domi::FftsPlusAicAivCtxDef &ctx_def, uint32_t op_index, | |||
rtFftsPlusAicAivCtx_t *&ctx) { | |||
ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num()); | |||
ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001); // 1 bit, 0000,0001 | |||
ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init()); | |||
ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt()); | |||
if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) { | |||
REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusAicAivCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusAicAivCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) { | |||
ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i)); | |||
} | |||
ctx->stat = static_cast<uint16_t>(ctx_def.stat() & 0X00000001); // 1 bit , 0000,0001 | |||
ctx->schem = static_cast<uint16_t>(ctx_def.schem() & 0X00000003); // 2 bits, 0000,0011 | |||
ctx->atm = static_cast<uint16_t>(ctx_def.atm() & 0X00000001); // 1 bit , 0000,0001 | |||
ctx->prefetchEnableBitmap = static_cast<uint16_t>(ctx_def.atm() & 0X0000000F); // 4 bits, 0000,1111 | |||
ctx->prefetchOnceBitmap = static_cast<uint16_t>(ctx_def.atm() & 0X0000000F); // 4 bits, 0000,1111 | |||
ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id()); | |||
ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim()); | |||
ctx->nonTailBlockdim = static_cast<uint16_t>(ctx_def.non_tail_block_dim()); | |||
ctx->tailBlockdim = static_cast<uint16_t>(ctx_def.tail_block_dim()); | |||
uint64_t task_param_ptr_base; | |||
if (ctx->atm == 0) { | |||
// TODO, manual | |||
task_param_ptr_base = reinterpret_cast<uintptr_t>(args_); | |||
} else { | |||
// auto | |||
task_param_ptr_base = reinterpret_cast<uintptr_t>(args_) + kAddrLen * io_addrs_.size(); | |||
GELOGD("FftsPlusAicAivCtxDef: task param addr is %lu.", task_param_ptr_base); | |||
const auto &rts_param = davinci_model_->GetRuntimeParam(); | |||
for (uint32_t i = 0; i < static_cast<uint32_t>(ctx->threadDim - 1); i++) { | |||
GE_CHK_STATUS_RET_NOLOG(InitIoAddrs(rts_param, ctx_def, i, | |||
static_cast<uint32_t>(ctx_def.task_addr_offset_size()))); | |||
} | |||
GE_CHK_STATUS_RET_NOLOG(InitIoAddrs(rts_param, ctx_def, static_cast<uint32_t>(ctx->threadDim - 1), | |||
ctx_def.input_output_count())); | |||
int last_thread_workspace_size = ctx_def.task_addr_size() - ctx_def.task_addr_offset_size(); | |||
for (int k = 0; k < last_thread_workspace_size; ++k) { | |||
uintptr_t logic_addr = ctx_def.task_addr(ctx_def.task_addr_offset_size() + k); | |||
uint8_t *io_addr = nullptr; | |||
GE_CHK_STATUS_RET_NOLOG(ModelUtils::GetRtAddress(rts_param, logic_addr, io_addr)); | |||
io_addrs_.emplace_back(io_addr); | |||
} | |||
} | |||
ctx->taskParamPtrBaseL = static_cast<uint32_t>(task_param_ptr_base & 0XFFFFFFFF); // low 32 bits | |||
ctx->taskParamPtrBaseH = static_cast<uint16_t>((task_param_ptr_base >> 32) & 0X0000FFFF); // high 16 bits | |||
ctx->taskParamPtrOffset = static_cast<uint16_t>(ctx_def.task_param_ptr_offset()); | |||
// PcL for low 32 bits of pc, PcH for high 16 bits of pc | |||
if (ctx_def.kernel_name_size() != kAicAivCtxPcNum) { | |||
REPORT_INNER_ERROR("E19999", "Size of kernel_name in FftsPlusAicAivCtxDef should be %d, but %d exactly", | |||
kAicAivCtxPcNum, ctx_def.kernel_name_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of kernel_name in FftsPlusAicAivCtxDef should be %d, but %d exactly", | |||
kAicAivCtxPcNum, ctx_def.kernel_name_size()); | |||
return FAILED; | |||
} | |||
uint32_t i_cache_prefetch_cnt_1; | |||
void *non_tail_task_start_pc = nullptr; | |||
GE_CHK_STATUS_RET_NOLOG(davinci_model_->GetAddrAndPrefCnt(ctx_def.kernel_name(kNonTailIndex), non_tail_task_start_pc, | |||
i_cache_prefetch_cnt_1)); | |||
ctx->nonTailTaskStartPcL = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(non_tail_task_start_pc) & 0XFFFFFFFF); | |||
ctx->nonTailTaskStartPcH = static_cast<uint16_t>((reinterpret_cast<uintptr_t>(non_tail_task_start_pc) >> 32) & | |||
0X0000FFFF); | |||
uint32_t i_cache_prefetch_cnt_2; | |||
void *tail_task_start_pc = nullptr; | |||
GE_CHK_STATUS_RET_NOLOG(davinci_model_->GetAddrAndPrefCnt(ctx_def.kernel_name(kTailIndex), tail_task_start_pc, | |||
i_cache_prefetch_cnt_2)); | |||
ctx->tailTaskStartPcL = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(tail_task_start_pc) & 0XFFFFFFFF); | |||
ctx->tailTaskStartPcH = static_cast<uint16_t>((reinterpret_cast<uintptr_t>(tail_task_start_pc) >> 32) & 0X0000FFFF); | |||
uint32_t i_cache_prefetch_cnt = std::min(i_cache_prefetch_cnt_1, i_cache_prefetch_cnt_2); | |||
ctx->icachePrefetchCnt = static_cast<uint16_t>(i_cache_prefetch_cnt & 0X0000001F); // 5 bits, 0001,1111 | |||
if (ctx_def.src_slot_size() != kSrcSlotNum) { | |||
REPORT_INNER_ERROR("E19999", "Size of src_slot in FftsPlusAicAivCtxDef should be %d, but %d exactly", | |||
kSrcSlotNum, ctx_def.src_slot_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of src_slot in FftsPlusAicAivCtxDef should be %d, but %d exactly", | |||
kSrcSlotNum, ctx_def.src_slot_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < kSrcSlotNum; i++) { | |||
ctx->srcSlot[i] = static_cast<uint16_t>(ctx_def.src_slot(i)); | |||
} | |||
return SUCCESS; | |||
} | |||
Status FftsPlusTaskInfo::InitNotifyCtx(const domi::FftsPlusNotifyCtxDef &ctx_def, rtFftsPlusNotifyCtx_t *&ctx) { | |||
ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num()); | |||
ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001); // 1 bit, 0000,0001 | |||
ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init()); | |||
ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt()); | |||
if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) { | |||
REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusNotifyCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusNotifyCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) { | |||
ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i)); | |||
} | |||
ctx->atm = static_cast<uint16_t>(ctx_def.atm() & 0X00000001); // 1 bit, 0000,0001 | |||
ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id()); | |||
ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim()); | |||
ctx->notifyIdBase = static_cast<uint16_t>(ctx_def.notify_id_base()); | |||
return SUCCESS; | |||
} | |||
Status FftsPlusTaskInfo::InitWriteValueCtx(const domi::FftsPlusWriteValueCtxDef &ctx_def, | |||
rtFftsPlusWriteValueCtx_t *&ctx) { | |||
ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num()); | |||
ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001); // 1 bit, 0000,0001 | |||
ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init()); | |||
ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt()); | |||
if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) { | |||
REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusWriteValueCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusWriteValueCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) { | |||
ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i)); | |||
} | |||
ctx->atm = static_cast<uint16_t>(ctx_def.atm() & 0X00000001); // 1 bit, 0000,0001 | |||
ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id()); | |||
ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim()); | |||
ctx->awSize = static_cast<uint8_t>(ctx_def.aw_size() & 0X00000007); // 3 bits, 0000,0111 | |||
ctx->snoop = static_cast<uint8_t>(ctx_def.snoop() & 0X00000001); // 1 bit , 0000,0001 | |||
ctx->awCache = static_cast<uint8_t>(ctx_def.aw_cache() & 0X0000000F); // 4 bits, 0000,1111 | |||
ctx->awProt = static_cast<uint8_t>(ctx_def.aw_prot() & 0X00000007); // 3 bits, 0000,0111 | |||
ctx->va = static_cast<uint8_t>(ctx_def.va() & 0X00000001); // 1 bit , 0000,0001 | |||
const auto &rts_param = davinci_model_->GetRuntimeParam(); | |||
uint8_t *write_addr_base = nullptr; | |||
if (ModelUtils::GetRtAddress(rts_param, ctx_def.write_addr_base(), write_addr_base) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress] GetRtAddress failed."); | |||
return INTERNAL_ERROR; | |||
} | |||
ctx->writeAddressBaseL = | |||
static_cast<uint32_t>(reinterpret_cast<uintptr_t>(write_addr_base) & 0XFFFFFFFF); // low 32 bits | |||
ctx->writeAddressBaseH = | |||
static_cast<uint32_t>((reinterpret_cast<uintptr_t>(write_addr_base) >> 32) & 0X0001FFFF); // high 17 bits | |||
ctx->writeAddressOffset = ctx_def.write_addr_offset(); | |||
if (ctx_def.write_value_size() != kWriteValueNum) { | |||
REPORT_INNER_ERROR("E19999", "Size of write_value in FftsPlusWriteValueCtxDef should be %d, but %d exactly", | |||
kWriteValueNum, ctx_def.write_value_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of write_value in FftsPlusWriteValueCtxDef should be %d, but %d exactly", | |||
kWriteValueNum, ctx_def.write_value_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < kWriteValueNum; i++) { | |||
ctx->writeValue[i] = static_cast<uint16_t>(ctx_def.write_value(i)); | |||
} | |||
return SUCCESS; | |||
} | |||
Status FftsPlusTaskInfo::InitMixAicAivCtx(const domi::FftsPlusMixAicAivCtxDef &ctx_def, uint32_t op_index, | |||
rtFftsPlusMixAicAivCtx_t *&ctx) { | |||
ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num()); | |||
ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001); // 1 bit, 0000,0001 | |||
ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init()); | |||
ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt()); | |||
if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) { | |||
REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusMixAicAivCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusMixAicAivCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) { | |||
ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i)); | |||
} | |||
ctx->stat = static_cast<uint16_t>(ctx_def.stat() & 0X00000001); // 1 bit , 0000,0001 | |||
ctx->schem = static_cast<uint16_t>(ctx_def.schem() & 0X00000003); // 2 bits, 0000,0011 | |||
ctx->atm = static_cast<uint16_t>(ctx_def.atm() & 0X00000001); // 1 bit , 0000,0001 | |||
ctx->prefetchEnableBitmap = static_cast<uint16_t>(ctx_def.prefetch_enable_bitmap() & 0X0000000F); // 4 bits, 0000,1111 | |||
ctx->prefetchOnceBitmap = static_cast<uint16_t>(ctx_def.prefetch_once_bitmap() & 0X0000000F); // 4 bits, 0000,1111 | |||
ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id()); | |||
ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim()); | |||
ctx->nonTailBlockRatioN = static_cast<uint8_t>(ctx_def.non_tail_block_ratio_n()); | |||
ctx->tailBlockRatioN = static_cast<uint8_t>(ctx_def.tail_block_ratio_n()); | |||
ctx->nonTailBlockdim = static_cast<uint16_t>(ctx_def.non_tail_block_dim()); | |||
ctx->tailBlockdim = static_cast<uint16_t>(ctx_def.tail_block_dim()); | |||
uint64_t task_param_ptr_base; | |||
if (ctx->atm == 0) { | |||
// TODO, manual | |||
task_param_ptr_base = reinterpret_cast<uintptr_t>(args_); | |||
} else { | |||
// auto | |||
task_param_ptr_base = reinterpret_cast<uintptr_t>(args_) + kAddrLen * io_addrs_.size(); | |||
GELOGD("FftsPlusMixAicAivCtxDef: task param addr is %lu.", task_param_ptr_base); | |||
const auto &rts_param = davinci_model_->GetRuntimeParam(); | |||
for (uint32_t i = 0; i < static_cast<uint32_t>(ctx->threadDim - 1); i++) { | |||
GE_CHK_STATUS_RET_NOLOG(InitIoAddrs(rts_param, ctx_def, i, | |||
static_cast<uint32_t>(ctx_def.task_addr_offset_size()))); | |||
} | |||
GE_CHK_STATUS_RET_NOLOG(InitIoAddrs(rts_param, ctx_def, static_cast<uint32_t>(ctx->threadDim - 1), | |||
ctx_def.input_output_count())); | |||
int last_thread_workspace_size = ctx_def.task_addr_size() - ctx_def.task_addr_offset_size(); | |||
for (int k = 0; k < last_thread_workspace_size; ++k) { | |||
uintptr_t logic_addr = ctx_def.task_addr(ctx_def.task_addr_offset_size() + k); | |||
uint8_t *io_addr = nullptr; | |||
GE_CHK_STATUS_RET_NOLOG(ModelUtils::GetRtAddress(rts_param, logic_addr, io_addr)); | |||
io_addrs_.emplace_back(io_addr); | |||
} | |||
} | |||
ctx->aicTaskParamPtrL = static_cast<uint32_t>(ctx_def.aic_task_param_ptr() & 0XFFFFFFFF); // low 32 bits | |||
ctx->aicTaskParamPtrH = static_cast<uint16_t>((ctx_def.aic_task_param_ptr() >> 32) & 0X0000FFFF); // high 16 bits | |||
ctx->aicTaskParamPtrOffset = static_cast<uint16_t>(ctx_def.aic_task_param_ptr_offset()); | |||
ctx->aivTaskParamPtrL = static_cast<uint32_t>(ctx_def.aiv_task_param_ptr() & 0XFFFFFFFF); // low 32 bits | |||
ctx->aivTaskParamPtrH = static_cast<uint16_t>((ctx_def.aiv_task_param_ptr() >> 32) & 0X0000FFFF); // high 16 bits | |||
ctx->aivTaskParamPtrOffset = static_cast<uint16_t>(ctx_def.aiv_task_param_ptr_offset()); | |||
// PcL for low 32 bits of pc, PcH for high 16 bits of pc | |||
if (ctx_def.kernel_name_size() != kMixAicAivCtxPcNum) { | |||
REPORT_INNER_ERROR("E19999", "Size of kernel_name in FftsPlusMixAicAivCtxDef should be %d, but %d exactly", | |||
kAicAivCtxPcNum, ctx_def.kernel_name_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of kernel_name in FftsPlusMixAicAivCtxDef should be %d, but %d exactly", | |||
kAicAivCtxPcNum, ctx_def.kernel_name_size()); | |||
return FAILED; | |||
} | |||
uint32_t i_cache_prefetch_cnt_1; | |||
void *non_tail_aic_task_start_pc = nullptr; | |||
GE_CHK_STATUS_RET_NOLOG(davinci_model_->GetAddrAndPrefCnt(ctx_def.kernel_name(kNonTailAicCtxIndex), | |||
non_tail_aic_task_start_pc, i_cache_prefetch_cnt_1)); | |||
ctx->nonTailAicTaskStartPcL = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(non_tail_aic_task_start_pc) & | |||
0XFFFFFFFF); | |||
ctx->nonTailAicTaskStartPcH = static_cast<uint16_t>((reinterpret_cast<uintptr_t>(non_tail_aic_task_start_pc) >> 32) & | |||
0X0000FFFF); | |||
uint32_t i_cache_prefetch_cnt_2; | |||
void *tail_aic_task_start_pc = nullptr; | |||
GE_CHK_STATUS_RET_NOLOG(davinci_model_->GetAddrAndPrefCnt(ctx_def.kernel_name(kTailAicCtxIndex), | |||
tail_aic_task_start_pc, i_cache_prefetch_cnt_2)); | |||
ctx->tailAicTaskStartPcL = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(tail_aic_task_start_pc) & 0XFFFFFFFF); | |||
ctx->tailAicTaskStartPcH = static_cast<uint16_t>((reinterpret_cast<uintptr_t>(tail_aic_task_start_pc) >> 32) & | |||
0X0000FFFF); | |||
uint32_t aic_i_cache_prefetch_cnt = std::min(i_cache_prefetch_cnt_1, i_cache_prefetch_cnt_2); | |||
ctx->icachePrefetchCnt = static_cast<uint16_t>(aic_i_cache_prefetch_cnt & 0X0000001F); // 5 bits, 0001,1111 | |||
uint32_t i_cache_prefetch_cnt_3; | |||
void *non_tail_aiv_task_start_pc = nullptr; | |||
GE_CHK_STATUS_RET_NOLOG(davinci_model_->GetAddrAndPrefCnt(ctx_def.kernel_name(kNonTailAivCtxIndex), | |||
non_tail_aiv_task_start_pc, i_cache_prefetch_cnt_3)); | |||
ctx->nonTailAivTaskStartPcL = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(non_tail_aiv_task_start_pc) & | |||
0XFFFFFFFF); | |||
ctx->nontailAivTaskStartPcH = static_cast<uint16_t>((reinterpret_cast<uintptr_t>(non_tail_aiv_task_start_pc) >> 32) & | |||
0X0000FFFF); | |||
uint32_t i_cache_prefetch_cnt_4; | |||
void *tail_aiv_task_start_pc = nullptr; | |||
GE_CHK_STATUS_RET_NOLOG(davinci_model_->GetAddrAndPrefCnt(ctx_def.kernel_name(kTailAivCtxIndex), | |||
tail_aiv_task_start_pc, i_cache_prefetch_cnt_4)); | |||
ctx->tailAivTaskStartPcL = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(tail_aiv_task_start_pc) & 0XFFFFFFFF); | |||
ctx->tailAivTaskStartPcH = static_cast<uint16_t>((reinterpret_cast<uintptr_t>(tail_aiv_task_start_pc) >> 32) & | |||
0X0000FFFF); | |||
uint32_t aiv_i_cache_prefetch_cnt = std::min(i_cache_prefetch_cnt_3, i_cache_prefetch_cnt_4); | |||
// TODO | |||
ctx->icachePrefetchCnt = static_cast<uint16_t>( | |||
std::min(aic_i_cache_prefetch_cnt, aiv_i_cache_prefetch_cnt) & 0X0000001F); // 5 bits, 0001,1111 | |||
if (ctx_def.src_slot_size() != kSrcSlotNum) { | |||
REPORT_INNER_ERROR("E19999", "Size of src_slot in FftsPlusMixAicAivCtxDef should be %d, but %d exactly", | |||
kSrcSlotNum, ctx_def.src_slot_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of src_slot in FftsPlusMixAicAivCtxDef should be %d, but %d exactly", | |||
kSrcSlotNum, ctx_def.src_slot_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < kSrcSlotNum; i++) { | |||
ctx->srcSlot[i] = static_cast<uint16_t>(ctx_def.src_slot(i)); | |||
} | |||
return SUCCESS; | |||
} | |||
Status FftsPlusTaskInfo::InitSdmaCtx(const domi::FftsPlusSdmaCtxDef &ctx_def, rtFftsPlusSdmaCtx_t *&ctx) { | |||
ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num()); | |||
ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001); // 1 bit, 0000,0001 | |||
ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init()); | |||
ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt()); | |||
if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) { | |||
REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusSdmaCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusSdmaCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) { | |||
ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i)); | |||
} | |||
ctx->sat = static_cast<uint8_t>(ctx_def.sat() & 0X00000001); // 1 bit, 0000,0001 | |||
ctx->atm = static_cast<uint8_t>(ctx_def.atm() & 0X00000001); // 1 bit, 0000,0001 | |||
ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id()); | |||
ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim()); | |||
ctx->sdmaSqeHeader = ctx_def.sdma_sqe_header(); | |||
ctx->sourceStreamId = static_cast<uint16_t>(ctx_def.src_stream_id()); | |||
ctx->sourceSubstreamId = static_cast<uint16_t>(ctx_def.src_sub_stream_id()); | |||
ctx->destinationStreamId = static_cast<uint16_t>(ctx_def.dst_stream_id()); | |||
ctx->destinationSubstreamId = static_cast<uint16_t>(ctx_def.dst_sub_stream_id()); | |||
const auto &rts_param = davinci_model_->GetRuntimeParam(); | |||
uint8_t *src_addr_base = nullptr; | |||
if (ModelUtils::GetRtAddress(rts_param, ctx_def.src_addr_base(), src_addr_base) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress] GetRtAddress failed."); | |||
return INTERNAL_ERROR; | |||
} | |||
ctx->sourceAddressBaseL = | |||
static_cast<uint32_t>(reinterpret_cast<uintptr_t>(src_addr_base) & 0XFFFFFFFF); // low 32 bits | |||
ctx->sourceAddressBaseH = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(src_addr_base) >> 32); // high 32 bits | |||
ctx->sourceAddressOffset = ctx_def.src_addr_offset(); | |||
uint8_t *dst_addr_base = nullptr; | |||
if (ModelUtils::GetRtAddress(rts_param, ctx_def.dst_addr_base(), dst_addr_base) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress] GetRtAddress failed."); | |||
return INTERNAL_ERROR; | |||
} | |||
ctx->destinationAddressBaseL = | |||
static_cast<uint32_t>(reinterpret_cast<uintptr_t>(dst_addr_base) & 0XFFFFFFFF); // low 32 bits | |||
ctx->destinationAddressBaseH = | |||
static_cast<uint32_t>(reinterpret_cast<uintptr_t>(dst_addr_base) >> 32); // high 32 bits | |||
ctx->destinationAddressOffset = ctx_def.dst_addr_offset(); | |||
ctx->nonTailDataLength = ctx_def.non_tail_data_len(); | |||
ctx->tailDataLength = ctx_def.tail_data_len(); | |||
return SUCCESS; | |||
} | |||
Status FftsPlusTaskInfo::InitDataCtx(const domi::FftsPlusDataCtxDef &ctx_def, rtFftsPlusDataCtx_t *&ctx) { | |||
ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num()); | |||
ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001); // 1 bit, 0000,0001 | |||
ctx->cntInit = static_cast<uint8_t>(ctx_def.cnt_init()); | |||
ctx->cnt = static_cast<uint8_t>(ctx_def.cnt()); | |||
if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) { | |||
REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusDataCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusDataCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) { | |||
ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i)); | |||
} | |||
ctx->atm = static_cast<uint8_t>(ctx_def.atm() & 0X00000001); // 1 bit, 0000,0001 | |||
ctx->origConsumerCounter = static_cast<uint16_t>(ctx_def.orig_consumer_counter()); | |||
ctx->runConsumerCounter = static_cast<uint16_t>(ctx_def.run_consumer_counter()); | |||
ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id()); | |||
ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim()); | |||
const auto &rts_param = davinci_model_->GetRuntimeParam(); | |||
uint8_t *addr_base = nullptr; | |||
if (ModelUtils::GetRtAddress(rts_param, ctx_def.addr_base(), addr_base) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress] GetRtAddress failed."); | |||
return INTERNAL_ERROR; | |||
} | |||
ctx->addressBaseL = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(addr_base) & 0XFFFFFFFF); // low 32 bits | |||
ctx->addressBaseH = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(addr_base) >> 32); // high 32 bits | |||
ctx->addressOffset = ctx_def.addr_offset(); | |||
ctx->nonTailNumOutter = static_cast<uint16_t>(ctx_def.non_tail_num_outter()); | |||
ctx->nonTailNumInner = static_cast<uint16_t>(ctx_def.non_tail_num_inner()); | |||
ctx->nonTailLengthInner = ctx_def.non_tail_len_inner(); | |||
ctx->nonTailStrideOutter = ctx_def.non_tail_stride_outter(); | |||
ctx->nonTailStrideInner = ctx_def.non_tail_stride_inner(); | |||
ctx->tailNumOutter = static_cast<uint16_t>(ctx_def.tail_num_outter()); | |||
ctx->tailNumInner = static_cast<uint16_t>(ctx_def.tail_num_inner()); | |||
ctx->tailLengthInner = ctx_def.tail_len_inner(); | |||
ctx->tailStrideOutter = ctx_def.tail_stride_outter(); | |||
ctx->tailStrideInner = ctx_def.tail_stride_inner(); | |||
return SUCCESS; | |||
} | |||
Status FftsPlusTaskInfo::InitAicpuCtx(const domi::FftsPlusAicpuCtxDef &ctx_def, rtFftsPlusAiCpuCtx_t *&ctx) { | |||
ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num()); | |||
ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001); // 1 bit, 0000,0001 | |||
ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init()); | |||
ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt()); | |||
if (ctx_def.successor_context_id_size() != RT_CTX_SUCCESSOR_NUM) { | |||
REPORT_INNER_ERROR("E19999", "Size of successor_context_id in FftsPlusAicpuCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_context_id_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of successor_context_id in FftsPlusAicpuCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_context_id_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) { | |||
ctx->successorContextID[i] = static_cast<uint16_t>(ctx_def.successor_context_id(i)); | |||
} | |||
ctx->atm = static_cast<uint16_t>(ctx_def.atm() & 0X00000001); // 1 bit, 0000,0001 | |||
ctx->sqeIndex = static_cast<uint16_t>(ctx_def.sqe_index()); | |||
ctx->kernelType = static_cast<uint8_t>(ctx_def.kernel_type() & 0X0000007F); // 7 bits, 0111,1111 | |||
ctx->bm = static_cast<uint8_t>(ctx_def.bm() & 0X00000001); // 1 bit , 0000,0001 | |||
ctx->topicType = static_cast<uint8_t>(ctx_def.topic_type() & 0X0000000F); // 4 bits, 0000,1111 | |||
ctx->qos = static_cast<uint8_t>(ctx_def.qos() & 0X00000007); // 3 bits, 0000,0111 | |||
ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id()); | |||
ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim()); | |||
ctx->nonTailBlockdim = static_cast<uint16_t>(ctx_def.non_tail_block_dim()); | |||
ctx->tailBlockdim = static_cast<uint16_t>(ctx_def.tail_block_dim()); | |||
if (ctx_def.user_data_size() != kUserDataNum) { | |||
REPORT_INNER_ERROR("E19999", "Size of user_data in FftsPlusAicpuCtxDef should be %d, but %d exactly", | |||
kUserDataNum, ctx_def.user_data_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of user_data in FftsPlusAicpuCtxDef should be %d, but %d exactly", | |||
kUserDataNum, ctx_def.user_data_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < kUserDataNum; i++) { | |||
ctx->usrData[i] = static_cast<uint32_t>(ctx_def.user_data(i)); | |||
} | |||
ctx->subtopicId = static_cast<uint32_t>(ctx_def.sub_topic_id() & 0X00000FFF); // 12 bits, 1111,1111,1111 | |||
ctx->topicId = static_cast<uint32_t>(ctx_def.topic_id() & 0X0000003F); // 6 bits, 0011,1111 | |||
ctx->groupId = static_cast<uint32_t>(ctx_def.group_id() & 0X0000003F); // 6 bits, 0011,1111 | |||
ctx->usrDataLength = static_cast<uint32_t>(ctx_def.user_data_len() & 0X000000FF); // 8 bits, 1111,1111 | |||
ctx->taskParamOffset = ctx_def.qos(); | |||
return SUCCESS; | |||
} | |||
Status FftsPlusTaskInfo::InitLoadCtx(const domi::FftsPlusCtxDef &ctx_def, uintptr_t &ctx) { | |||
switch (ctx_def.software_ctx_type()) { | |||
case RT_SOFT_CTX_TYPE_COND_SWITCH: { | |||
auto cond_switch_ctx = reinterpret_cast<rtFftsPlusCondSwitchCtx_t *>(ctx); | |||
GE_CHK_STATUS_RET_NOLOG(InitCondSwitchCtx(ctx_def.cond_switch_ctx(), cond_switch_ctx)); | |||
break; | |||
} | |||
case RT_SOFT_CTX_TYPE_CASE_SWITCH: { | |||
if (ctx_def.has_case_switch_ctx() == ctx_def.has_case_default_ctx()) { | |||
REPORT_INNER_ERROR("E19999", "case_switch_ctx %s and case_default_ctx %s when software ctx type is case", | |||
ctx_def.has_case_switch_ctx() ? "exist" : "not exist", | |||
ctx_def.has_case_default_ctx() ? "exist" : "not exist"); | |||
GELOGE(FAILED, "[Check][Ctx] case_switch_ctx %s and case_default_ctx %s when software ctx type is case", | |||
ctx_def.has_case_switch_ctx() ? "exist" : "not exist", | |||
ctx_def.has_case_default_ctx() ? "exist" : "not exist"); | |||
return FAILED; | |||
} | |||
if (ctx_def.has_case_switch_ctx()) { | |||
auto case_switch_ctx = reinterpret_cast<rtFftsPlusCaseSwitchCtx_t *>(ctx); | |||
GE_CHK_STATUS_RET_NOLOG(InitCaseSwitchCtx(ctx_def.case_switch_ctx(), case_switch_ctx)); | |||
} | |||
if (ctx_def.has_case_default_ctx()) { | |||
auto case_default_ctx = reinterpret_cast<rtFftsPlusCaseDefCtx_t *>(ctx); | |||
GE_CHK_STATUS_RET_NOLOG(InitCaseDefaultCtx(ctx_def.case_default_ctx(), case_default_ctx)); | |||
} | |||
break; | |||
} | |||
default: | |||
REPORT_INNER_ERROR("E19999", "Unsupported software ctx type %u when hardware ctx type is load", | |||
ctx_def.hardware_ctx_type()); | |||
GELOGE(FAILED, "[Check][CtxType] Unsupported software ctx type %u when hardware ctx type is load", | |||
ctx_def.hardware_ctx_type()); | |||
return FAILED; | |||
} | |||
return SUCCESS; | |||
} | |||
Status FftsPlusTaskInfo::InitCondSwitchCtx(const domi::FftsPlusCondSwitchCtxDef &ctx_def, | |||
rtFftsPlusCondSwitchCtx_t *&ctx) { | |||
ctx->trueSuccessorNum = static_cast<uint8_t>(ctx_def.true_successor_num()); | |||
ctx->falseSuccessorNum = static_cast<uint8_t>(ctx_def.false_successor_num() & 0X0000007F); // 7 bits, 0111,1111 | |||
ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001); // 1 bit , 0000,0001 | |||
if (ctx_def.condition() == RT_COND_TYPE_MAX) { | |||
REPORT_INNER_ERROR("E19999", "Unsupported cond type %u", ctx_def.condition()); | |||
GELOGE(FAILED, "[Check][CtxType] Unsupported cond type %u", ctx_def.condition()); | |||
return FAILED; | |||
} | |||
ctx->condition = static_cast<rtFftsPlusCondType_t>(ctx_def.condition()); | |||
ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init()); | |||
ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt()); | |||
if (ctx_def.true_successor_list_size() != RT_CTX_TRUE_SUCCESSOR_NUM) { | |||
REPORT_INNER_ERROR("E19999", "Size of true_successor_list in FftsPlusCondSwitchCtxDef should be %d, but %d exactly", | |||
RT_CTX_TRUE_SUCCESSOR_NUM, ctx_def.true_successor_list_size()); | |||
GELOGE(FAILED, | |||
"[Check][Param] Size of true_successor_list in FftsPlusCondSwitchCtxDef should be %d, but %d exactly", | |||
RT_CTX_TRUE_SUCCESSOR_NUM, ctx_def.true_successor_list_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < RT_CTX_TRUE_SUCCESSOR_NUM; i++) { | |||
ctx->trueSuccessorList[i] = static_cast<uint16_t>(ctx_def.true_successor_list(i)); | |||
} | |||
if (ctx_def.false_successor_list_size() != RT_CTX_FALSE_SUCCESSOR_NUM) { | |||
REPORT_INNER_ERROR("E19999", | |||
"Size of false_successor_list in FftsPlusCondSwitchCtxDef should be %d, but %d exactly", | |||
RT_CTX_FALSE_SUCCESSOR_NUM, ctx_def.false_successor_list_size()); | |||
GELOGE(FAILED, | |||
"[Check][Param] Size of false_successor_list in FftsPlusCondSwitchCtxDef should be %d, but %d exactly", | |||
RT_CTX_FALSE_SUCCESSOR_NUM, ctx_def.false_successor_list_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < RT_CTX_FALSE_SUCCESSOR_NUM; i++) { | |||
ctx->falseSuccessorList[i] = static_cast<uint16_t>(ctx_def.false_successor_list(i)); | |||
} | |||
ctx->atm = static_cast<uint16_t>(ctx_def.atm() & 0X00000001); // 1 bit, 0000,0001 | |||
ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id()); | |||
ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim()); | |||
ctx->arSize = static_cast<uint8_t>(ctx_def.ar_size() & 0X00000007); // 3 bits, 0000,0111 | |||
ctx->snoop = static_cast<uint8_t>(ctx_def.snoop() & 0X00000001); // 1 bit , 0000,0001 | |||
ctx->arCache = static_cast<uint8_t>(ctx_def.ar_cache() & 0X0000000F); // 4 bits, 0000,1111 | |||
ctx->arProt = static_cast<uint8_t>(ctx_def.ar_prot() & 0X00000007); // 3 bits, 0000,0111 | |||
ctx->va = static_cast<uint8_t>(ctx_def.va() & 0X00000001); // 1 bit , 0000,0001 | |||
const auto &rts_param = davinci_model_->GetRuntimeParam(); | |||
uint8_t *addr_base_0 = nullptr; | |||
if (ModelUtils::GetRtAddress(rts_param, ctx_def.load_addr0_base(), addr_base_0) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress] GetRtAddress failed."); | |||
return INTERNAL_ERROR; | |||
} | |||
ctx->loadAddress0BaseL = | |||
static_cast<uint32_t>(reinterpret_cast<uintptr_t>(addr_base_0) & 0XFFFFFFFF); // low 32 bits | |||
ctx->loadAddress0BaseH = | |||
static_cast<uint32_t>((reinterpret_cast<uintptr_t>(addr_base_0) >> 32) & 0X0001FFFF); // high 17 bits | |||
ctx->ld0En = static_cast<uint32_t>(ctx_def.ld0_en() & 0X00000001); // 1 bit , 0000,0001 | |||
ctx->loadAddress0Offset = ctx_def.load_addr0_offset(); | |||
uint8_t *addr_base_1 = nullptr; | |||
if (ModelUtils::GetRtAddress(rts_param, ctx_def.load_addr1_base(), addr_base_1) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress] GetRtAddress failed."); | |||
return INTERNAL_ERROR; | |||
} | |||
ctx->loadAddress1BaseL = | |||
static_cast<uint32_t>(reinterpret_cast<uintptr_t>(addr_base_1) & 0XFFFFFFFF); // low 32 bits | |||
ctx->loadAddress1BaseH = | |||
static_cast<uint32_t>((reinterpret_cast<uintptr_t>(addr_base_1) >> 32) & 0X0001FFFF); // high 17 bits | |||
ctx->ld1En = static_cast<uint32_t>(ctx_def.ld1_en() & 0X00000001); // 1 bit , 0000,0001 | |||
ctx->loadAddress1Offset = ctx_def.load_addr1_offset(); | |||
ctx->cmpValue1 = ctx_def.cmp_value_1(); | |||
ctx->cmpValue2 = ctx_def.cmp_value_2(); | |||
return SUCCESS; | |||
} | |||
Status FftsPlusTaskInfo::InitCaseSwitchCtx(const domi::FftsPlusCaseSwitchCtxDef &ctx_def, | |||
rtFftsPlusCaseSwitchCtx_t *&ctx) { | |||
ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num()); | |||
ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001); // 1 bit , 0000,0001 | |||
ctx->startLabelId = static_cast<uint8_t>(ctx_def.successor_num()); | |||
ctx->labelListLen = static_cast<uint8_t>(ctx_def.label_list_len()); | |||
ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init()); | |||
ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt()); | |||
if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) { | |||
REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusCaseDefaultCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusCaseDefaultCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) { | |||
ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i)); | |||
} | |||
ctx->atm = static_cast<uint8_t>(ctx_def.atm() & 0X00000001); // 1 bit , 0000,0001 | |||
ctx->threadId = static_cast<uint16_t>(ctx_def.thread_id()); | |||
ctx->threadDim = static_cast<uint16_t>(ctx_def.thread_dim()); | |||
ctx->arSize = static_cast<uint8_t>(ctx_def.ar_size() & 0X00000007); // 3 bits, 0000,0111 | |||
ctx->snoop = static_cast<uint8_t>(ctx_def.snoop() & 0X00000001); // 1 bit , 0000,0001 | |||
ctx->arCache = static_cast<uint8_t>(ctx_def.ar_cache() & 0X0000000F); // 4 bits, 0000,1111 | |||
ctx->arProt = static_cast<uint8_t>(ctx_def.ar_prot() & 0X00000007); // 3 bits, 0000,0111 | |||
ctx->va = static_cast<uint8_t>(ctx_def.va() & 0X00000001); // 1 bit , 0000,0001 | |||
const auto &rts_param = davinci_model_->GetRuntimeParam(); | |||
uint8_t *addr_base_0 = nullptr; | |||
if (ModelUtils::GetRtAddress(rts_param, ctx_def.load_addr0_base(), addr_base_0) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress] GetRtAddress failed."); | |||
return INTERNAL_ERROR; | |||
} | |||
ctx->loadAddress0BaseL = | |||
static_cast<uint32_t>(reinterpret_cast<uintptr_t>(addr_base_0) & 0XFFFFFFFF); // low 32 bits | |||
ctx->loadAddress0BaseH = | |||
static_cast<uint32_t>((reinterpret_cast<uintptr_t>(addr_base_0) >> 32) & 0X0001FFFF); // high 17 bits | |||
ctx->ld0En = static_cast<uint32_t>(ctx_def.ld0_en() & 0X00000001); // 1 bit , 0000,0001 | |||
ctx->loadAddress0Offset = ctx_def.load_addr0_offset(); | |||
uint8_t *addr_base_1 = nullptr; | |||
if (ModelUtils::GetRtAddress(rts_param, ctx_def.load_addr1_base(), addr_base_1) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress] GetRtAddress failed."); | |||
return INTERNAL_ERROR; | |||
} | |||
ctx->loadAddress1BaseL = | |||
static_cast<uint32_t>(reinterpret_cast<uintptr_t>(addr_base_1) & 0XFFFFFFFF); // low 32 bits | |||
ctx->loadAddress1BaseH = | |||
static_cast<uint32_t>((reinterpret_cast<uintptr_t>(addr_base_1) >> 32) & 0X0001FFFF); // high 17 bits | |||
ctx->ld1En = static_cast<uint32_t>(ctx_def.ld1_en() & 0X00000001); // 1 bit , 0000,0001 | |||
ctx->loadAddress1Offset = ctx_def.load_addr1_offset(); | |||
return SUCCESS; | |||
} | |||
Status FftsPlusTaskInfo::InitCaseDefaultCtx(const domi::FftsPlusCaseDefaultCtxDef &ctx_def, | |||
rtFftsPlusCaseDefCtx_t *&ctx) { | |||
ctx->successorNum = static_cast<uint8_t>(ctx_def.successor_num()); | |||
ctx->aten = static_cast<uint8_t>(ctx_def.aten() & 0X00000001); // 1 bit , 0000,0001 | |||
ctx->startLabelId = static_cast<uint8_t>(ctx_def.successor_num()); | |||
ctx->labelListLen = static_cast<uint8_t>(ctx_def.label_list_len()); | |||
ctx->predCntInit = static_cast<uint8_t>(ctx_def.pred_cnt_init()); | |||
ctx->predCnt = static_cast<uint8_t>(ctx_def.pred_cnt()); | |||
if (ctx_def.successor_list_size() != RT_CTX_SUCCESSOR_NUM) { | |||
REPORT_INNER_ERROR("E19999", "Size of successor_list in FftsPlusCaseDefaultCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
GELOGE(FAILED, "[Check][Param] Size of successor_list in FftsPlusCaseDefaultCtxDef should be %d, but %d exactly", | |||
RT_CTX_SUCCESSOR_NUM, ctx_def.successor_list_size()); | |||
return FAILED; | |||
} | |||
for (size_t i = 0; i < RT_CTX_SUCCESSOR_NUM; i++) { | |||
ctx->successorList[i] = static_cast<uint16_t>(ctx_def.successor_list(i)); | |||
} | |||
return SUCCESS; | |||
} | |||
Status FftsPlusTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
return SUCCESS; | |||
} | |||
Status FftsPlusTaskInfo::UpdateArgs() { | |||
GE_CHECK_NOTNULL(davinci_model_); | |||
std::vector<void *> io_addrs = io_addrs_; | |||
davinci_model_->UpdateKnownZeroCopyAddr(io_addrs); | |||
auto addr_size = kAddrLen * io_addrs.size(); | |||
GE_CHK_RT_RET(rtMemcpy(args_, args_size_, io_addrs.data(), addr_size, RT_MEMCPY_HOST_TO_DEVICE)); | |||
return SUCCESS; | |||
} | |||
Status FftsPlusTaskInfo::Distribute() { | |||
GELOGI("FftsPlusTaskInfo Distribute Start."); | |||
rtError_t rt_ret = rtFftsPlusTaskLaunch(&ffts_plus_task_info_, stream_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "[Check][RT_ret] Call rtFftsPlusTaskLaunch failed, ret: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
GELOGI("FftsPlusTaskInfo Distribute Success."); | |||
return SUCCESS; | |||
} | |||
// task_addr = {0,200,700,1000,2000, 3500} | |||
// task_addr_offset = {20,40,2,100,200} | |||
template <typename T> | |||
Status FftsPlusTaskInfo::InitIoAddrs(const RuntimeParam &rts_param, const T &ctx_def, uint32_t thread_id, | |||
uint32_t addr_count) { | |||
for (uint32_t i = 0; i < addr_count; ++i) { | |||
uintptr_t logic_addr = ctx_def.task_addr(i) + thread_id * ctx_def.task_addr_offset(i); | |||
uint8_t *io_addr = nullptr; | |||
if (ModelUtils::GetRtAddress(rts_param, logic_addr, io_addr) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress] GetRtAddress failed."); | |||
return INTERNAL_ERROR; | |||
} | |||
GELOGD("task base addr is %ld, offset is %ld, thread id is %d, logic addr is 0x%lx, io addr is %p", | |||
ctx_def.task_addr(i), ctx_def.task_addr_offset(i), thread_id, logic_addr, io_addr); | |||
io_addrs_.emplace_back(io_addr); | |||
} | |||
return SUCCESS; | |||
} | |||
REGISTER_TASK_INFO(RT_MODEL_TASK_FFTS_PLUS_TASK, FftsPlusTaskInfo); | |||
} // namespace ge |
@@ -0,0 +1,70 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FFTS_PLUS_TASK_INFO_H_ | |||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FFTS_PLUS_TASK_INFO_H_ | |||
#include "graph/load/model_manager/task_info/task_info.h" | |||
#include "graph/op_desc.h" | |||
namespace ge { | |||
class FftsPlusTaskInfo : public TaskInfo { | |||
public: | |||
FftsPlusTaskInfo() = default; | |||
~FftsPlusTaskInfo() override; | |||
Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | |||
Status Distribute() override; | |||
Status UpdateArgs() override; | |||
Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; | |||
private: | |||
void InitFftsPlusSqe(const domi::FftsPlusSqeDef &sqe_def, rtFftsPlusSqe_t *&sqe); | |||
void InitFftsPlusSqeHeader(const domi::StarsSqeHeaderDef &sqe_header_def, rtStarsSqeHeader_t &sqe_header); | |||
Status InitFftsPlusCtx(const domi::FftsPlusTaskDef &task_def, size_t ctx_num, void *&ctx); | |||
Status InitAtStartCtx(const domi::FftsPlusAtStartCtxDef &ctx_def, rtFftsPlusAtStartCtx_t *&ctx); | |||
Status InitAtEndCtx(const domi::FftsPlusAtEndCtxDef &ctx_def, rtFftsPlusAtEndCtx_t *&ctx); | |||
Status InitLabelCtx(const domi::FftsPlusLabelCtxDef &ctx_def, rtFftsPlusLabelCtx_t *&ctx); | |||
Status InitHardWareCtx(const domi::FftsPlusCtxDef &ctx_def, uintptr_t &ctx); | |||
Status InitAicAivCtx(const domi::FftsPlusAicAivCtxDef &ctx_def, uint32_t op_index, rtFftsPlusAicAivCtx_t *&ctx); | |||
Status InitNotifyCtx(const domi::FftsPlusNotifyCtxDef &ctx_def, rtFftsPlusNotifyCtx_t *&ctx); | |||
Status InitWriteValueCtx(const domi::FftsPlusWriteValueCtxDef &ctx_def, rtFftsPlusWriteValueCtx_t *&ctx); | |||
Status InitMixAicAivCtx(const domi::FftsPlusMixAicAivCtxDef &ctx_def, uint32_t op_index, rtFftsPlusMixAicAivCtx_t *&ctx); | |||
Status InitSdmaCtx(const domi::FftsPlusSdmaCtxDef &ctx_def, rtFftsPlusSdmaCtx_t *&ctx); | |||
Status InitDataCtx(const domi::FftsPlusDataCtxDef &ctx_def, rtFftsPlusDataCtx_t *&ctx); | |||
Status InitAicpuCtx(const domi::FftsPlusAicpuCtxDef &ctx_def, rtFftsPlusAiCpuCtx_t *&ctx); | |||
Status InitLoadCtx(const domi::FftsPlusCtxDef &ctx_def, uintptr_t &ctx); | |||
Status InitCondSwitchCtx(const domi::FftsPlusCondSwitchCtxDef &ctx_def, rtFftsPlusCondSwitchCtx_t *&ctx); | |||
Status InitCaseSwitchCtx(const domi::FftsPlusCaseSwitchCtxDef &ctx_def, rtFftsPlusCaseSwitchCtx_t *&ctx); | |||
Status InitCaseDefaultCtx(const domi::FftsPlusCaseDefaultCtxDef &ctx_def, rtFftsPlusCaseDefCtx_t *&ctx); | |||
template<typename T> | |||
Status InitIoAddrs(const RuntimeParam &rts_param, const T &aic_aiv_def, uint32_t thread_id, uint32_t addr_count); | |||
DavinciModel *davinci_model_{nullptr}; | |||
rtFftsPlusTaskInfo_t ffts_plus_task_info_; | |||
std::vector<void *> io_addrs_; | |||
void *args_{nullptr}; // runtime args memory | |||
uint32_t args_size_{0}; // runtime args memory length | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FFTS_PLUS_TASK_INFO_H_ |
@@ -26,8 +26,8 @@ | |||
#include "external/graph/attr_value.h" | |||
#include "graph/load/model_manager/davinci_model.h" | |||
#include "graph/load/model_manager/model_manager.h" | |||
#include "hybrid/node_executor/aicpu/aicpu_ext_info.h" | |||
#include "framework/common/debug/log.h" | |||
#include "runtime/rt.h" | |||
namespace { | |||
const char *const kAicpuAllshape = "_AllShape"; | |||
@@ -43,7 +43,7 @@ Status KernelExTaskInfo::InitTaskExtInfo(const std::string &ext_info, const OpDe | |||
UnknowShapeOpType unknown_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | |||
uint32_t num_inputs = op_desc->GetInputsSize(); | |||
uint32_t num_outputs = op_desc->GetOutputsSize(); | |||
std::unique_ptr<ge::hybrid::AicpuExtInfoHandler> ext_handle( | |||
std::shared_ptr<ge::hybrid::AicpuExtInfoHandler> ext_handle( | |||
new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc->GetName(), | |||
num_inputs, | |||
num_outputs, | |||
@@ -76,6 +76,16 @@ Status KernelExTaskInfo::InitTaskExtInfo(const std::string &ext_info, const OpDe | |||
} | |||
} | |||
} | |||
AttrUtils::GetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_); | |||
GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc->GetName().c_str(), is_blocking_aicpu_op_); | |||
if (UpdateEventIdForAicpuBlockingOp(op_desc, ext_handle) != SUCCESS) { | |||
GELOGE(FAILED, "[Call][UpdateEventIdForAicpuBlockingOp] failed for op:%s(%s)", | |||
op_desc->GetName().c_str(), op_desc->GetType().c_str()); | |||
return FAILED; | |||
} | |||
auto rt_ret = rtMalloc(&ext_info_addr_, ext_handle->GetExtInfoLen(), RT_MEMORY_HBM); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X", ext_info.size(), rt_ret); | |||
@@ -448,6 +458,101 @@ Status KernelExTaskInfo::Distribute() { | |||
stream_id_ = stream_id; | |||
GELOGI("KernelExTaskInfo Distribute Success. task id: %u, stream id: %u", task_id_, stream_id_); | |||
if (is_blocking_aicpu_op_) { | |||
if (DistributeWaitTaskForAicpuBlockingOp() != SUCCESS) { | |||
GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed"); | |||
return FAILED; | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status KernelExTaskInfo::CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support) { | |||
int32_t device_id = 0; | |||
auto rt_ret = rtGetDevice(&device_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtGetDevice failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][rtGetDevice] failed, ret:0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
int32_t value = 0; | |||
rt_ret = rtGetDeviceCapability(device_id, FEATURE_TYPE_BLOCKING_OPERATOR, RT_MODULE_TYPE_AICPU, &value); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtGetDeviceCapability failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][rtGetDeviceCapability] failed, ret:0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
if (value != RT_AICPU_BLOCKING_OP_NOT_SUPPORT && value != RT_AICPU_BLOCKING_OP_SUPPORT) { | |||
REPORT_INNER_ERROR("E19999", "Value should be %d or %d but %d", | |||
RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); | |||
GELOGE(FAILED, "[Check][Value] Value should be %d or %d but %d", | |||
RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); | |||
return FAILED; | |||
} | |||
is_support = (value == RT_AICPU_BLOCKING_OP_SUPPORT ? true : false); | |||
return SUCCESS; | |||
} | |||
Status KernelExTaskInfo::UpdateEventIdForAicpuBlockingOp(const OpDescPtr &op_desc, | |||
std::shared_ptr<ge::hybrid::AicpuExtInfoHandler> &ext_handle) { | |||
if (is_blocking_aicpu_op_) { | |||
bool is_support = false; | |||
if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { | |||
GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); | |||
return FAILED; | |||
} | |||
if (!is_support) { | |||
GELOGD("Device not support blocking aicpu op process"); | |||
return SUCCESS; | |||
} | |||
uint32_t event_id = 0; | |||
if (davinci_model_->GetEventIdForBlockingAicpuOp(op_desc, stream_, event_id) != SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Get event id failed for op:%s(%s).", op_desc->GetName().c_str(), | |||
op_desc->GetType().c_str()); | |||
GELOGE(FAILED, "[Get][EventId] Get event id failed for op:%s(%s)", op_desc->GetName().c_str(), | |||
op_desc->GetType().c_str()); | |||
return FAILED; | |||
} | |||
if (ext_handle->UpdateEventId(event_id) != SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Update event id failed for op:%s(%s).", op_desc->GetName().c_str(), | |||
op_desc->GetType().c_str()); | |||
GELOGE(FAILED, "[Update][EventId] Update event id failed for op:%s(%s)", op_desc->GetName().c_str(), | |||
op_desc->GetType().c_str()); | |||
return FAILED; | |||
} | |||
GELOGI("Update event_id=%u success", event_id); | |||
} | |||
return SUCCESS; | |||
} | |||
Status KernelExTaskInfo::DistributeWaitTaskForAicpuBlockingOp() { | |||
bool is_support = false; | |||
if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { | |||
GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); | |||
return FAILED; | |||
} | |||
if (!is_support) { | |||
GELOGD("Device not support blocking aicpu op process."); | |||
return SUCCESS; | |||
} | |||
GELOGD("Distribute wait task begin"); | |||
rtEvent_t rt_event = nullptr; | |||
if (davinci_model_->GetEventByStream(stream_, rt_event) != SUCCESS) { | |||
GELOGE(FAILED, "[Call][GetEventByStream] Call GetEventByStream failed"); | |||
return FAILED; | |||
} | |||
auto rt_ret = rtStreamWaitEvent(stream_, rt_event); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtEventReset(rt_event, stream_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtEventReset failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -19,6 +19,7 @@ | |||
#include "graph/load/model_manager/task_info/task_info.h" | |||
#include "graph/op_desc.h" | |||
#include "hybrid/node_executor/aicpu/aicpu_ext_info.h" | |||
namespace ge { | |||
class KernelExTaskInfo : public TaskInfo { | |||
@@ -65,6 +66,12 @@ class KernelExTaskInfo : public TaskInfo { | |||
void InitDumpArgs(void *addr, const OpDescPtr &op_desc); | |||
Status InitTaskExtInfo(const std::string &ext_info, const OpDescPtr &op_desc); | |||
// for blocking aicpu op | |||
Status DistributeWaitTaskForAicpuBlockingOp(); | |||
Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); | |||
Status UpdateEventIdForAicpuBlockingOp(const OpDescPtr &op_desc, | |||
std::shared_ptr<ge::hybrid::AicpuExtInfoHandler> &ext_handle); | |||
uint32_t task_id_; | |||
uint32_t stream_id_; | |||
uint32_t dump_flag_; | |||
@@ -79,6 +86,7 @@ class KernelExTaskInfo : public TaskInfo { | |||
uint32_t args_offset_ = 0; | |||
int64_t fixed_addr_offset_ = 0; | |||
int32_t topic_type_flag_ = -1; | |||
bool is_blocking_aicpu_op_ = false; | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ |
@@ -28,11 +28,10 @@ | |||
#include "graph/load/model_manager/davinci_model.h" | |||
#include "graph/load/model_manager/model_manager.h" | |||
#include "graph/load/model_manager/model_utils.h" | |||
#include "runtime/kernel.h" | |||
#include "runtime/rt.h" | |||
#include "graph/load/model_manager/task_info/super_kernel/super_kernel.h" | |||
#include "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.h" | |||
#include "cce/aicpu_engine_struct.h" | |||
#include "hybrid/node_executor/aicpu/aicpu_ext_info.h" | |||
#include "framework/common/debug/log.h" | |||
namespace { | |||
@@ -474,6 +473,12 @@ Status KernelTaskInfo::Distribute() { | |||
} | |||
// set for task_id_ | |||
UpdateTaskId(); | |||
if (is_blocking_aicpu_op_) { | |||
if (DistributeWaitTaskForAicpuBlockingOp() != SUCCESS) { | |||
GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed"); | |||
return FAILED; | |||
} | |||
} | |||
GELOGD( | |||
"KernelTaskInfo Distribute Success. sktenable:%d taskid:%d sktid:%d stubfunc_name:%s stubfunc:%p " | |||
"blockdim:%d stream:%p", | |||
@@ -482,6 +487,91 @@ Status KernelTaskInfo::Distribute() { | |||
return SUCCESS; | |||
} | |||
Status KernelTaskInfo::CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support) { | |||
int32_t device_id = 0; | |||
auto rt_ret = rtGetDevice(&device_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtGetDevice failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][rtGetDevice] failed, ret:0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
int32_t value = 0; | |||
rt_ret = rtGetDeviceCapability(device_id, FEATURE_TYPE_BLOCKING_OPERATOR, RT_MODULE_TYPE_AICPU, &value); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtGetDeviceCapability failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][rtGetDeviceCapability] failed, ret:0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
if (value != RT_AICPU_BLOCKING_OP_NOT_SUPPORT && value != RT_AICPU_BLOCKING_OP_SUPPORT) { | |||
REPORT_INNER_ERROR("E19999", "Value should be %d or %d but %d", | |||
RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); | |||
GELOGE(FAILED, "[Check][Value] Value should be %d or %d but %d", | |||
RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); | |||
return FAILED; | |||
} | |||
is_support = (value == RT_AICPU_BLOCKING_OP_SUPPORT ? true : false); | |||
return SUCCESS; | |||
} | |||
Status KernelTaskInfo::UpdateEventIdForAicpuBlockingOp(std::shared_ptr<ge::hybrid::AicpuExtInfoHandler> &ext_handle) { | |||
if (is_blocking_aicpu_op_) { | |||
bool is_support = false; | |||
if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { | |||
GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); | |||
return FAILED; | |||
} | |||
if (!is_support) { | |||
GELOGD("Device not support blocking aicpu op process"); | |||
return SUCCESS; | |||
} | |||
uint32_t event_id = 0; | |||
if (davinci_model_->GetEventIdForBlockingAicpuOp(op_desc_, stream_, event_id) != SUCCESS) { | |||
GELOGE(FAILED, "[Get][EventId] Get event id failed for op:%s(%s)", op_desc_->GetName().c_str(), | |||
op_desc_->GetType().c_str()); | |||
return FAILED; | |||
} | |||
if (ext_handle->UpdateEventId(event_id) != SUCCESS) { | |||
GELOGE(FAILED, "[Update][EventId] Update event id failed for op:%s(%s)", op_desc_->GetName().c_str(), | |||
op_desc_->GetType().c_str()); | |||
return FAILED; | |||
} | |||
GELOGI("Update event_id=%u success", event_id); | |||
} | |||
return SUCCESS; | |||
} | |||
Status KernelTaskInfo::DistributeWaitTaskForAicpuBlockingOp() { | |||
bool is_support = false; | |||
if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { | |||
GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); | |||
return FAILED; | |||
} | |||
if (!is_support) { | |||
GELOGD("device not support blocking aicpu op process."); | |||
return SUCCESS; | |||
} | |||
GELOGD("Distribute wait task begin"); | |||
rtEvent_t rt_event = nullptr; | |||
if (davinci_model_->GetEventByStream(stream_, rt_event) != SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Call GetEventByStream failed"); | |||
GELOGE(FAILED, "[Call][GetEventByStream] Call GetEventByStream failed"); | |||
return FAILED; | |||
} | |||
auto rt_ret = rtStreamWaitEvent(stream_, rt_event); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtEventReset(rt_event, stream_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtEventReset failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
return SUCCESS; | |||
} | |||
void KernelTaskInfo::SetIoAddrs(const OpDescPtr &op_desc) { | |||
const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | |||
vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc); | |||
@@ -1109,7 +1199,7 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { | |||
UnknowShapeOpType unknown_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | |||
uint32_t num_inputs = op_desc_->GetInputsSize(); | |||
uint32_t num_outputs = op_desc_->GetOutputsSize(); | |||
std::unique_ptr<ge::hybrid::AicpuExtInfoHandler> ext_handle( | |||
std::shared_ptr<ge::hybrid::AicpuExtInfoHandler> ext_handle( | |||
new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc_->GetName(), | |||
num_inputs, | |||
num_outputs, | |||
@@ -1145,6 +1235,16 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { | |||
j, op_desc_->GetName().c_str()); | |||
} | |||
} | |||
AttrUtils::GetBool(op_desc_, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_); | |||
GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc_->GetName().c_str(), is_blocking_aicpu_op_); | |||
if (UpdateEventIdForAicpuBlockingOp(ext_handle) != SUCCESS) { | |||
GELOGE(FAILED, "[Call][UpdateEventIdForAicpuBlockingOp] failed for op:%s(%s)", | |||
op_desc_->GetName().c_str(), op_desc_->GetType().c_str()); | |||
return FAILED; | |||
} | |||
auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_handle->GetExtInfoLen(), RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%zu, ret:0x%X", | |||
@@ -24,6 +24,8 @@ | |||
#include "graph/load/model_manager/task_info/task_info.h" | |||
#include "graph/op_desc.h" | |||
#include "hybrid/node_executor/aicpu/aicpu_ext_info.h" | |||
namespace ge { | |||
class KernelTaskInfo : public TaskInfo { | |||
public: | |||
@@ -148,6 +150,11 @@ class KernelTaskInfo : public TaskInfo { | |||
bool DoubleCallSKTSaveCheck(); | |||
void SetArgs(); | |||
// for blocking aicpu op | |||
Status DistributeWaitTaskForAicpuBlockingOp(); | |||
Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); | |||
Status UpdateEventIdForAicpuBlockingOp(std::shared_ptr<ge::hybrid::AicpuExtInfoHandler> &ext_handle); | |||
void *stub_func_; | |||
void *args_; | |||
void *sm_desc_; | |||
@@ -187,6 +194,7 @@ class KernelTaskInfo : public TaskInfo { | |||
uint32_t skt_dump_flag_ = RT_KERNEL_DEFAULT; | |||
void *superkernel_device_args_addr_ = nullptr; | |||
void *superkernel_dev_nav_table_ = nullptr; | |||
bool is_blocking_aicpu_op_ = false; | |||
struct AICPUCustomInfo { | |||
void *input_descs = nullptr; | |||
@@ -808,6 +808,14 @@ Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_gr | |||
GELOGE(ret, "[Call][OptimizeSubGraphWithMultiThreads] failed, ret:%d, session_id:%lu", ret, session_id); | |||
return ret; | |||
} | |||
for (const auto &item : sub_graph_map) { | |||
for (const auto &subgraph_info : item.second) { | |||
const auto &subgraph = subgraph_info->GetSubGraph(); | |||
for (const auto &new_graph : subgraph->GetAllSubgraphs()) { | |||
compute_graph->AddSubGraph(new_graph); | |||
} | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -881,8 +889,8 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, | |||
CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId()); | |||
GM_RUN_AND_DUMP_PERF("OptimizeWholeGraph", stages.optimizer.OptimizeWholeGraph, compute_graph); | |||
GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph); | |||
GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts", | |||
GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts, | |||
GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuild", | |||
GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuild, | |||
compute_graph); | |||
Status ret = compute_graph->TopologicalSorting(); | |||
@@ -1381,8 +1389,8 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTen | |||
ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id); | |||
graph_node->SetRunFlag(false); | |||
if (ret != SUCCESS) { | |||
GELOGE(GE_GRAPH_PRERUN_FAILED, "[Call][StartForRunGraph] failed! graph_id:%u.", graph_id); | |||
return GE_GRAPH_PRERUN_FAILED; | |||
GELOGE(ret, "[Call][StartForRunGraph] failed! graph_id:%u.", graph_id); | |||
return ret; | |||
} | |||
GELOGI("[BuildGraph] build graph success, graph_id=%u.", graph_id); | |||
@@ -2837,20 +2845,59 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra | |||
GELOGE(ret, "[Call][Partition] for Graph:%s by dynamic shape Failed", compute_graph->GetName().c_str()); | |||
return ret; | |||
} | |||
bool dynamic_shape_partitioned = false; | |||
if (!AttrUtils::GetBool(*compute_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, dynamic_shape_partitioned)) { | |||
REPORT_INNER_ERROR("E19999", "Get Attr:%s from graph:%s(id:%u) fail", | |||
ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetName().c_str(), | |||
compute_graph->GetGraphID()); | |||
GELOGE(FAILED, "[Get][Attr] %s from graph:%u failed", | |||
ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetGraphID()); | |||
if (!compute_graph->HasAttr(ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED)) { | |||
REPORT_INNER_ERROR("E19999", "Attr:%s not exist in graph:%s(id:%u)", ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), | |||
compute_graph->GetName().c_str(), compute_graph->GetGraphID()); | |||
GELOGE(FAILED, "[Get][Attr] Attr %s not exist in graph:%u", ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), | |||
compute_graph->GetGraphID()); | |||
return FAILED; | |||
} | |||
GE_TIMESTAMP_EVENT_END(GraphPartitionDynamicShape, "OptimizeSubgraph::GraphPartitionDynamicShape"); | |||
GE_DUMP(compute_graph, "AfterDynamicShapePartition"); | |||
GE_TIMESTAMP_START(SubgraphPartitionAndOptimization_CompoundEngine); | |||
ret = SubgraphPartitionAndOptimization(graph_node, compute_graph, session_id, | |||
GraphPartitioner::kCompoundEnginePartitioning); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "[SubgraphPartitionAndOptimization][CompoundEngine] for graph:%s failed", | |||
compute_graph->GetName().c_str()); | |||
return ret; | |||
} | |||
GE_TIMESTAMP_EVENT_END(SubgraphPartitionAndOptimization_CompoundEngine, | |||
"OptimizeSubgraph::SubgraphPartitionAndOptimization::CompoundEngine"); | |||
GE_DUMP(compute_graph, "MergedComputeGraphAfterCompoundEnginePartition"); | |||
GE_TIMESTAMP_START(SubgraphPartitionAndOptimization_AtomicEngine); | |||
ret = SubgraphPartitionAndOptimization(graph_node, compute_graph, session_id, | |||
GraphPartitioner::kAtomicEnginePartitioning); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "[SubgraphPartitionAndOptimization][AtomicEngine] for graph:%s failed", | |||
compute_graph->GetName().c_str()); | |||
return ret; | |||
} | |||
GE_TIMESTAMP_EVENT_END(SubgraphPartitionAndOptimization_AtomicEngine, | |||
"OptimizeSubgraph::SubgraphPartitionAndOptimization::AtomicEngine"); | |||
GE_DUMP(compute_graph, "MergedComputeGraphAfterAtomicEnginePartition"); | |||
return SUCCESS; | |||
} | |||
Status GraphManager::SubgraphPartitionAndOptimization(const GraphNodePtr &graph_node, ComputeGraphPtr &compute_graph, | |||
uint64_t session_id, GraphPartitioner::Mode mode) { | |||
std::shared_ptr<GELib> instance_ptr = GELib::GetInstance(); | |||
if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { | |||
REPORT_INNER_ERROR("E19999", "GELib instance is nullptr or it is not InitFlag, check invalid."); | |||
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Get][GELib] Run enginePlacer failed, because GELib is invalid."); | |||
return FAILED; | |||
} | |||
if ((mode == GraphPartitioner::kCompoundEnginePartitioning) && | |||
instance_ptr->OpsKernelManagerObj().GetCompoundEngineContains().empty()) { | |||
GELOGI("No compound engine registers, ignore subgraph partition and optimization for compound engine"); | |||
return SUCCESS; | |||
} | |||
GE_TIMESTAMP_START(GraphPartition); | |||
GraphPartitioner &partitioner = GetCompilerStages(graph_node->GetGraphId()).partitioner; | |||
ret = partitioner.Partition(compute_graph, GraphPartitioner::kPartitioning); | |||
Status ret = partitioner.Partition(compute_graph, mode); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "[Call][Partition] for Graph:%s Failed", compute_graph->GetName().c_str()); | |||
return ret; | |||
@@ -2863,24 +2910,24 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra | |||
return ret; | |||
} | |||
GE_TIMESTAMP_EVENT_END(SetSubgraph, "OptimizeSubgraph::SetSubGraph"); | |||
std::set<string> build_steps = {BUILD_STEP_BEFORE_UB_MATCH, BUILD_STEP_AFTER_BUILDER, BUILD_STEP_AFTER_BUILDER_SUB}; | |||
if ((options_.build_mode == BUILD_MODE_TUNING) && (build_steps.count(options_.build_step) > 0)) { | |||
GE_TIMESTAMP_START(ConvertGraphToFile); | |||
std::string tuning_path; | |||
(void) GetContext().GetOption(TUNING_PATH, tuning_path); | |||
Status ret = ConvertGraphToFile(compute_graph, partitioner, tuning_path, | |||
(options_.build_step == BUILD_STEP_AFTER_BUILDER)); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "[Convert][Graph] [%s] to file failed", compute_graph->GetName().c_str()); | |||
return ret; | |||
if (mode == GraphPartitioner::kAtomicEnginePartitioning) { | |||
std::set<string> build_steps = {BUILD_STEP_BEFORE_UB_MATCH, BUILD_STEP_AFTER_BUILDER, BUILD_STEP_AFTER_BUILDER_SUB}; | |||
if ((options_.build_mode == BUILD_MODE_TUNING) && (build_steps.count(options_.build_step) > 0)) { | |||
GE_TIMESTAMP_START(ConvertGraphToFile); | |||
std::string tuning_path; | |||
(void) GetContext().GetOption(TUNING_PATH, tuning_path); | |||
Status ret = ConvertGraphToFile(compute_graph, partitioner, tuning_path, | |||
(options_.build_step == BUILD_STEP_AFTER_BUILDER)); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "[Convert][Graph] [%s] to file failed", compute_graph->GetName().c_str()); | |||
return ret; | |||
} | |||
GE_TIMESTAMP_EVENT_END(ConvertGraphToFile, "OptimizeSubgraph::ConvertGraphToFile"); | |||
return SUCCESS; | |||
} | |||
GE_TIMESTAMP_EVENT_END(ConvertGraphToFile, "OptimizeSubgraph::ConvertGraphToFile"); | |||
return SUCCESS; | |||
} | |||
ComputeGraphPtr merged_compute_graph = nullptr; | |||
std::vector<ComputeGraphPtr> merged_sub_graph_list; | |||
GE_TIMESTAMP_START(MergeSubgraph); | |||
ret = MergeSubGraph(merged_compute_graph, compute_graph, graph_node->GetGraphId()); | |||
if (ret != SUCCESS) { | |||
@@ -2896,27 +2943,31 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra | |||
sub_graph->SetSessionID(session_id); | |||
sub_graph->SetGraphID(graph_node->GetGraphId()); | |||
} | |||
bool off_superkernel = false; | |||
if (AttrUtils::GetBool(compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) { | |||
GELOGI("Compute graph %s get superkernel flag %d.", compute_graph->GetName().c_str(), off_superkernel); | |||
if (!AttrUtils::SetBool(merged_compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) { | |||
REPORT_INNER_ERROR("E19999", "Set Attr:%s to graph:%u fail", | |||
bool off_super_kernel = false; | |||
if (AttrUtils::GetBool(compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_super_kernel)) { | |||
GELOGI("Compute graph %s get super kernel flag %d.", compute_graph->GetName().c_str(), off_super_kernel); | |||
if (!AttrUtils::SetBool(merged_compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_super_kernel)) { | |||
REPORT_INNER_ERROR("E19999", "Set Attr:%s to graph:%u failed", | |||
ATTR_NAME_OFF_SUPERKERNEL_ATTR.c_str(), compute_graph->GetGraphID()); | |||
GELOGE(FAILED, "[Set][Attr] %s to graph:%u fail", | |||
GELOGE(FAILED, "[Set][Attr] %s to graph:%u failed", | |||
ATTR_NAME_OFF_SUPERKERNEL_ATTR.c_str(), compute_graph->GetGraphID()); | |||
return FAILED; | |||
} | |||
} | |||
bool dynamic_shape_partitioned = false; | |||
if (AttrUtils::GetBool(compute_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, dynamic_shape_partitioned)) { | |||
GELOGI("Compute graph %s get super kernel flag %d.", compute_graph->GetName().c_str(), dynamic_shape_partitioned); | |||
if (!AttrUtils::SetBool(merged_compute_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, dynamic_shape_partitioned)) { | |||
REPORT_INNER_ERROR("E19999", "Set Attr:%s to graph:%u failed", | |||
ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetGraphID()); | |||
GELOGE(FAILED, "[Set][Attr] %s to graph:%u failed", | |||
ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetGraphID()); | |||
return FAILED; | |||
} | |||
} | |||
GE_TIMESTAMP_EVENT_END(MergeSubgraph, "OptimizeSubgraph::MergeSubGraph"); | |||
GE_DUMP(merged_compute_graph, "mergedComputeGraph"); | |||
compute_graph = merged_compute_graph; | |||
if (!AttrUtils::SetBool(*compute_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, dynamic_shape_partitioned)) { | |||
REPORT_INNER_ERROR("E19999", "Set Attr:%s to graph:%u fail", | |||
ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetGraphID()); | |||
GELOGE(FAILED, "[Set][Attr] %s to graph:%u fail", | |||
ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetGraphID()); | |||
return FAILED; | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -243,6 +243,9 @@ class GraphManager { | |||
Status OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGraphPtr &compute_graph, uint64_t session_id); | |||
Status SubgraphPartitionAndOptimization(const GraphNodePtr &graph_node, ComputeGraphPtr &compute_graph, | |||
uint64_t session_id, GraphPartitioner::Mode mode); | |||
Status Build(const GraphNodePtr &graph_node, ComputeGraphPtr &compute_graph, | |||
GeRootModelPtr &ge_root_model, uint64_t session_id); | |||
@@ -17,10 +17,10 @@ | |||
#include "graph/optimize/graph_optimize.h" | |||
#include "graph/ge_context.h" | |||
#include "common/local_context.h" | |||
#include "graph/passes/dimension_adjust_pass.h" | |||
#include "inc/pass_manager.h" | |||
#include "init/gelib.h" | |||
#include "graph/partition/engine_place.h" | |||
namespace { | |||
const char *const kVectorCore = "VectorCore"; | |||
@@ -85,9 +85,6 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std | |||
return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL; | |||
} | |||
Status ret = SUCCESS; | |||
vector<GraphOptimizerPtr> graph_optimizer; | |||
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
REPORT_INNER_ERROR("E19999", "Gelib not init before, check invalid, graph:%s", | |||
@@ -96,7 +93,7 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std | |||
compute_graph->GetName().c_str()); | |||
return GE_CLI_GE_NOT_INITIALIZED; | |||
} | |||
vector<GraphOptimizerPtr> graph_optimizer; | |||
if (instance_ptr->DNNEngineManagerObj().IsEngineRegistered(engine_name)) { | |||
instance_ptr->OpsKernelManagerObj().GetGraphOptimizerByEngine(engine_name, graph_optimizer); | |||
AddNodeInputProperty(compute_graph); | |||
@@ -123,7 +120,7 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std | |||
} | |||
for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) { | |||
ret = (*iter)->OptimizeFusedGraph(*(compute_graph)); | |||
Status ret = (*iter)->OptimizeFusedGraph(*(compute_graph)); | |||
if (ret != SUCCESS) { | |||
REPORT_INNER_ERROR("E19999", "Call OptimizeFusedGraph failed, ret:%d, engine_name:%s, " | |||
"graph_name:%s", ret, engine_name.c_str(), | |||
@@ -137,7 +134,7 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std | |||
GELOGI("Engine: %s is not registered. do nothing in subGraph Optimize by ATC.", engine_name.c_str()); | |||
} | |||
return ret; | |||
return SUCCESS; | |||
} | |||
Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) { | |||
@@ -269,13 +266,27 @@ Status GraphOptimize::OptimizeOriginalGraphForQuantize(ComputeGraphPtr &compute_ | |||
return ret; | |||
} | |||
Status GraphOptimize::OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_graph) { | |||
Status GraphOptimize::OptimizeGraphBeforeBuild(ComputeGraphPtr &compute_graph) { | |||
if (compute_graph == nullptr) { | |||
REPORT_INNER_ERROR("E19999", "Param compute_graph is nullptr, check invalid"); | |||
GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[Check][Param] compute_graph is nullptr."); | |||
return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL; | |||
} | |||
EnginePlacer engine_place(compute_graph); | |||
Status ret = engine_place.Run(); | |||
if (ret != SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Assign atomic engine for graph %s failed", compute_graph->GetName().c_str()); | |||
GELOGE(ret, "[Assign][Engine] Assign atomic engine for graph %s failed", compute_graph->GetName().c_str()); | |||
return ret; | |||
} | |||
ret = engine_place.AssignCompoundEngine(); | |||
if (ret != SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Assign compound engine for graph %s failed", compute_graph->GetName().c_str()); | |||
GELOGE(ret, "[Assign][Engine] Assign compound engine for graph %s failed", compute_graph->GetName().c_str()); | |||
return ret; | |||
} | |||
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
REPORT_INNER_ERROR("E19999", "Gelib not init before, check invalid, graph:%s.", | |||
@@ -284,13 +295,11 @@ Status GraphOptimize::OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_gr | |||
compute_graph->GetName().c_str()); | |||
return GE_CLI_GE_NOT_INITIALIZED; | |||
} | |||
auto graph_optimizer = instance_ptr->OpsKernelManagerObj().GetAllGraphOptimizerObjsByPriority(); | |||
GELOGD("optimize by opskernel in graph optimize before build phase. num of graph_optimizer is %zu.", | |||
graph_optimizer.size()); | |||
Status ret = SUCCESS; | |||
string exclude_core_Type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine; | |||
GELOGD("[OptimizeGraphBeforeBuildForRts]: engine type will exclude: %s, core_type_: %s", | |||
GELOGD("[OptimizeGraphBeforeBuild]: engine type will exclude: %s, core_type_: %s", | |||
exclude_core_Type.c_str(), core_type_.c_str()); | |||
if (graph_optimizer.size() != 0) { | |||
for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) { | |||
@@ -308,7 +317,7 @@ Status GraphOptimize::OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_gr | |||
} | |||
} | |||
} | |||
return ret; | |||
return SUCCESS; | |||
} | |||
Status GraphOptimize::OptimizeAfterStage1(ComputeGraphPtr &compute_graph) { | |||
@@ -55,8 +55,8 @@ class GraphOptimize { | |||
// for engine to optimize merged whole graph before ge Optimize2 | |||
Status OptimizeWholeGraph(ComputeGraphPtr &compute_graph); | |||
// for rts optimize before build to add attr and insert memcpy op | |||
Status OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_graph); | |||
// for optimize before build | |||
Status OptimizeGraphBeforeBuild(ComputeGraphPtr &compute_graph); | |||
// optimize whole graph, using after stage1 | |||
Status OptimizeAfterStage1(ComputeGraphPtr &graph); | |||
@@ -16,19 +16,12 @@ | |||
#include "graph/partition/engine_place.h" | |||
#include <climits> | |||
#include <memory> | |||
#include <string> | |||
#include <utility> | |||
#include <mutex> | |||
#include "framework/common/op/ge_op_utils.h" | |||
#include "common/util/error_manager/error_manager.h" | |||
#include "graph/utils/graph_utils.h" | |||
#include "graph/utils/op_desc_utils.h" | |||
#include "init/gelib.h" | |||
#include "opskernel_manager/ops_kernel_manager.h" | |||
#include "analyzer/analyzer.h" | |||
namespace ge { | |||
namespace { | |||
@@ -40,7 +33,7 @@ Status EnginePlacer::Check() const { | |||
GELOGE(GE_GRAPH_NULL_INPUT, "[Check][Param] compute_graph_ is nullptr."); | |||
return FAILED; | |||
} | |||
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
std::shared_ptr<GELib> instance_ptr = GELib::GetInstance(); | |||
if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { | |||
REPORT_INNER_ERROR("E19999", "GELib instance is nullptr or it is not InitFlag, check invalid."); | |||
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Get][GELib] Run enginePlacer failed, because GELib is invalid."); | |||
@@ -49,7 +42,7 @@ Status EnginePlacer::Check() const { | |||
return SUCCESS; | |||
} | |||
Status EnginePlacer::Run() { | |||
Status EnginePlacer::Run(bool direct_node_flag) { | |||
std::lock_guard<std::mutex> lock(check_support_cost_mutex); | |||
GELOGD("Engine placer starts."); | |||
@@ -58,8 +51,8 @@ Status EnginePlacer::Run() { | |||
} | |||
bool is_check_support_success = true; | |||
// Assign engine for each node in the graph | |||
ge::GELib::GetInstance()->DNNEngineManagerObj().InitPerformanceStaistic(); | |||
for (const auto &node_ptr : compute_graph_->GetDirectNode()) { | |||
GELib::GetInstance()->DNNEngineManagerObj().InitPerformanceStatistic(); | |||
for (const auto &node_ptr : compute_graph_->GetNodes(direct_node_flag)) { | |||
GE_CHECK_NOTNULL(node_ptr); | |||
auto op_desc = node_ptr->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
@@ -73,9 +66,7 @@ Status EnginePlacer::Run() { | |||
bool use_exist_engine_name = !op_desc->GetOpKernelLibName().empty() || (has_kernel_attr && has_engine_attr); | |||
if (use_exist_engine_name) { | |||
if (op_desc->GetOpEngineName().empty()) { | |||
GELOGI("Op %s set engine_name %s engine_name %s from attrs", | |||
op_desc->GetName().c_str(), | |||
engine_name.c_str(), | |||
GELOGI("Op %s set engine_name %s engine_name %s from attrs", op_desc->GetName().c_str(), engine_name.c_str(), | |||
kernel_name.c_str()); | |||
op_desc->SetOpEngineName(engine_name); | |||
op_desc->SetOpKernelLibName(kernel_name); | |||
@@ -83,7 +74,7 @@ Status EnginePlacer::Run() { | |||
engine_name = op_desc->GetOpEngineName(); | |||
} else { | |||
// Call placer cost model to get the "best" engine for this node | |||
engine_name = ge::GELib::GetInstance()->DNNEngineManagerObj().GetDNNEngineName(node_ptr); | |||
engine_name = GELib::GetInstance()->DNNEngineManagerObj().GetDNNEngineName(node_ptr); | |||
// If can't get op's engine name, keep check support finish and return failed | |||
if (engine_name.empty()) { | |||
is_check_support_success = false; | |||
@@ -94,34 +85,48 @@ Status EnginePlacer::Run() { | |||
continue; | |||
} | |||
} | |||
if (AssignEngineAndLog(node_ptr, engine_name) != SUCCESS) { | |||
GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "[Call][AssignEngineAndLog] FAILED, node:%s", op_desc->GetName().c_str()); | |||
return FAILED; | |||
} | |||
// Record the node assigned atomic_engine name | |||
GELOGD("Assigning DNNEngine %s to node %s, op type %s", engine_name.c_str(), node_ptr->GetName().c_str(), | |||
node_ptr->GetType().c_str()); | |||
node_atomic_engine_map_.insert(std::make_pair(node_ptr, engine_name)); | |||
} | |||
for (auto &it : ge::GELib::GetInstance()->DNNEngineManagerObj().GetCheckSupportCost()) { | |||
for (auto &it : GELib::GetInstance()->DNNEngineManagerObj().GetCheckSupportCost()) { | |||
GEEVENT("The time cost of %s::CheckSupported is [%lu] micro second.", it.first.c_str(), it.second); | |||
} | |||
GELOGD("Engine placer ends."); | |||
return is_check_support_success ? SUCCESS : FAILED; | |||
} | |||
Status EnginePlacer::AssignEngineAndLog(ge::ConstNodePtr node_ptr, const std::string &engine_name) { | |||
if ((node_ptr == nullptr) || (node_ptr->GetOpDesc() == nullptr)) { | |||
REPORT_INNER_ERROR("E19999", "Param node_ptr is nullptr or it's opdesc is nullptr, check invalid."); | |||
GELOGE(FAILED, "[Check][Param] node_ptr is nullptr."); | |||
Status EnginePlacer::AssignCompoundEngine() { | |||
if (GELib::GetInstance()->OpsKernelManagerObj().GetCompoundEngineContains().empty()) { | |||
GELOGI("No compound engine registers, ignore assign compound engine"); | |||
return SUCCESS; | |||
} | |||
std::vector<ComputeGraphPtr> subgraphs; | |||
if (GraphUtils::GetSubgraphs(compute_graph_, subgraphs) != GRAPH_SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Get subgraphs contained in graph %s failed", compute_graph_->GetName().c_str()); | |||
GELOGE(FAILED, "[Get][Subgraphs] Get subgraphs contained in graph %s failed", compute_graph_->GetName().c_str()); | |||
return FAILED; | |||
} | |||
// private function, promise node_ptr->GetOpDesc() not null | |||
GELOGD("Assigning DNNEngine %s to node %s, op type %s", engine_name.c_str(), node_ptr->GetName().c_str(), | |||
node_ptr->GetOpDesc()->GetType().c_str()); | |||
// Record the node assigned engine name | |||
node_engine_map_.insert(std::make_pair(node_ptr, engine_name)); | |||
for (const auto &subgraph : subgraphs) { | |||
(void)subgraph->DelAttr(ATTR_NAME_COMPOUND_ENGINE_NAME); | |||
} | |||
std::reverse(subgraphs.begin(), subgraphs.end()); | |||
subgraphs.emplace_back(compute_graph_); | |||
for (const auto &subgraph : subgraphs) { | |||
for (const auto &node : subgraph->GetDirectNode()) { | |||
std::string compound_engine_name = GELib::GetInstance()->DNNEngineManagerObj().GetCompoundEngineName(node, 1); | |||
GELOGD("Assign compound engine %s to node %s, op type %s", compound_engine_name.c_str(), | |||
node->GetName().c_str(), node->GetType().c_str()); | |||
node_compound_engine_map_.insert(std::make_pair(node, compound_engine_name)); | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
} // namespace ge | |||
const NodeEngineMap *EnginePlacer::GetNodeEngineMap(bool compound_engine_flag) const { | |||
return compound_engine_flag ? &node_compound_engine_map_ : &node_atomic_engine_map_; | |||
} | |||
} // namespace ge |
@@ -17,7 +17,6 @@ | |||
#ifndef GE_GRAPH_PARTITION_ENGINE_PLACE_H_ | |||
#define GE_GRAPH_PARTITION_ENGINE_PLACE_H_ | |||
#include <string> | |||
#include <unordered_map> | |||
#include "framework/common/ge_inner_error_codes.h" | |||
@@ -37,19 +36,20 @@ class EnginePlacer { | |||
EnginePlacer() = default; | |||
~EnginePlacer() = default; | |||
Status Run(); | |||
Status Run(bool direct_node_flag = true); | |||
Status AssignCompoundEngine(); | |||
// Get the unique node-engine map | |||
const NodeEngineMap *GetNodeEngineMap() const { return &node_engine_map_; } | |||
const NodeEngineMap *GetNodeEngineMap(bool compound_engine_flag) const; | |||
void SetComputeGraph(const ComputeGraphPtr &compute_graph) { compute_graph_ = compute_graph; } | |||
private: | |||
Status AssignEngineAndLog(ConstNodePtr node_ptr, const std::string &engine_name); | |||
Status Check() const; | |||
ComputeGraphPtr compute_graph_; | |||
NodeEngineMap node_engine_map_; | |||
NodeEngineMap node_atomic_engine_map_; | |||
NodeEngineMap node_compound_engine_map_; | |||
}; | |||
} // namespace ge | |||
@@ -23,17 +23,12 @@ | |||
#include <vector> | |||
#include "analyzer/analyzer.h" | |||
#include "common/ge/ge_util.h" | |||
#include "framework/common/op/ge_op_utils.h" | |||
#include "framework/common/types.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "graph/manager/graph_manager_utils.h" | |||
#include "common/ge_call_wrapper.h" | |||
#include "graph/utils/graph_utils.h" | |||
#include "graph/utils/op_desc_utils.h" | |||
#include "graph/utils/type_utils.h" | |||
#include "init/gelib.h" | |||
#include "opskernel_manager/ops_kernel_manager.h" | |||
namespace { | |||
const char *const kEngineDefaultData = "ENGINE_DEFAULT_DATA"; | |||
@@ -386,7 +381,8 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr | |||
dst_node_op_desc->GetOpEngineName()), GELOGW("SetStr rearNodeEngineName failed");) | |||
// replace input_desc of end with owner node's desc | |||
int output_index = ge::AnchorUtils::GetIdx(out_anchor); | |||
bool is_need_update_desc = (output_index >= 0) && (graph_info_.mode_ == kPartitioning); | |||
bool is_need_update_desc = (output_index >= 0) && ((graph_info_.mode_ == kAtomicEnginePartitioning) || | |||
(graph_info_.mode_ == kCompoundEnginePartitioning)); | |||
if (is_need_update_desc) { | |||
if (UpdateEndOpDesc(src_node, output_index, end_op_desc) != SUCCESS) { | |||
GELOGE(GRAPH_PARAM_INVALID, "[Update][EndOpDesc] failed, input index:%d, end_op_desc:%s", | |||
@@ -464,7 +460,8 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr | |||
graph_info_.num_of_pld_end_++; | |||
// replace output_desc of pld with input node's output desc | |||
int input_index = ge::AnchorUtils::GetIdx(peer_in_anchor); | |||
is_need_update_desc = (input_index >= 0) && (graph_info_.mode_ == kPartitioning); | |||
is_need_update_desc = (input_index >= 0) && ((graph_info_.mode_ == kAtomicEnginePartitioning) || | |||
(graph_info_.mode_ == kCompoundEnginePartitioning)); | |||
if (is_need_update_desc) { | |||
if (UpdatePldOpDesc(dst_node, input_index, pld_op_desc) != SUCCESS) { | |||
GELOGE(GRAPH_PARAM_INVALID, "[Update][PldOpDesc] failed, output index:%d, pld_op_desc:%s", | |||
@@ -629,18 +626,8 @@ bool ge::GraphPartitioner::HasNoInput(ge::NodePtr node) { | |||
Status ge::GraphPartitioner::Initialize(ge::ComputeGraphPtr compute_graph) { | |||
GELOGI("Initialize starts."); | |||
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
if (instance_ptr == nullptr || compute_graph == nullptr) { | |||
REPORT_INNER_ERROR("E19999", "compute_graph or instance_ptr of GELib is nullptr, check invalid."); | |||
GELOGE(GE_GRAPH_NOT_INIT, "[Check][Param] compute_graph or instance_ptr of GELib is nullptr."); | |||
return FAILED; | |||
} | |||
graph_info_.engine_placer_.SetComputeGraph(compute_graph); | |||
if (graph_info_.engine_placer_.Run() != SUCCESS) { | |||
GELOGE(FAILED, "[Call][Run] Engine placer run failed, graph:%s.", compute_graph->GetName().c_str()); | |||
return FAILED; | |||
} | |||
const NodeEngineMap *node_engine_map = graph_info_.engine_placer_.GetNodeEngineMap(); | |||
GE_CHECK_NOTNULL(compute_graph); | |||
const NodeEngineMap *node_engine_map = GetNodeEngineMap(); | |||
size_t temp_index = 0; | |||
// travese nodes by topo order one by one | |||
for (const auto &node : compute_graph->GetDirectNode()) { | |||
@@ -999,6 +986,25 @@ bool ge::GraphPartitioner::HasSecondPath(size_t src, size_t dst, size_t upper_bo | |||
} | |||
Status ge::GraphPartitioner::Partition(ge::ComputeGraphPtr compute_graph, Mode mode) { | |||
if (compute_graph->TopologicalSorting() != SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "TopologicalSorting for graph:%s failed", | |||
compute_graph->GetName().c_str()); | |||
GELOGE(GE_GRAPH_TOPO_SORT_FAILED, "[Call][TopologicalSorting] for subGraph:%s failed", | |||
compute_graph->GetName().c_str()); | |||
return FAILED; | |||
} | |||
graph_info_.engine_placer_.SetComputeGraph(compute_graph); | |||
if (graph_info_.engine_placer_.Run(false) != SUCCESS) { | |||
GELOGE(FAILED, "[Call][Run] Engine placer run failed, graph:%s.", compute_graph->GetName().c_str()); | |||
return FAILED; | |||
} | |||
if (mode == GraphPartitioner::kCompoundEnginePartitioning) { | |||
if (graph_info_.engine_placer_.AssignCompoundEngine() != SUCCESS) { | |||
GELOGE(FAILED, "[Partition][SubGraph] Assign compound engine for graph %s failed", | |||
compute_graph->GetName().c_str()); | |||
return FAILED; | |||
} | |||
} | |||
ClearAllPartitionData(); | |||
auto real_ret = SUCCESS; | |||
auto ret = PartitionSubGraph(compute_graph, mode); | |||
@@ -1043,14 +1049,6 @@ Status ge::GraphPartitioner::PartitionSubGraph(ge::ComputeGraphPtr compute_graph | |||
return FAILED; | |||
} | |||
GELOGI("Graph Partition starts, graph nodes size is %zu", compute_graph->GetDirectNodesSize()); | |||
Status ret = compute_graph->TopologicalSorting(); | |||
if (ret != SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "TopologicalSorting for graph:%s failed", | |||
compute_graph->GetName().c_str()); | |||
GELOGE(GE_GRAPH_TOPO_SORT_FAILED, "[Call][TopologicalSorting] for subGraph:%s failed", | |||
compute_graph->GetName().c_str()); | |||
return FAILED; | |||
} | |||
GE_TIMESTAMP_START(PartitionSubGraphInitialize); | |||
if (Initialize(compute_graph) != SUCCESS) { | |||
GELOGE(GE_GRAPH_INIT_FAILED, "[Call][Initialize] for graph:%s failed", compute_graph->GetName().c_str()); | |||
@@ -1234,4 +1232,8 @@ void ge::GraphPartitioner::ClearAllPartitionData() { | |||
GELOGD("Clear all partition data success."); | |||
return; | |||
} | |||
const NodeEngineMap *GraphPartitioner::GetNodeEngineMap() const { | |||
return graph_info_.engine_placer_.GetNodeEngineMap(graph_info_.mode_ == kCompoundEnginePartitioning); | |||
} | |||
} // namespace ge |
@@ -56,7 +56,12 @@ class GraphPartitioner { | |||
/// Partition() can only be called in Partition mode. | |||
/// MergeAfterSubGraphOptimization() can only be called in Merge mode. | |||
/// After Partition(), change to Merge mode. After MergeAfterSubGraphOptimization(), change to Partition mode | |||
enum Mode { kPartitioning, kSecondPartitioning, kMerging }; | |||
enum Mode { | |||
kAtomicEnginePartitioning, | |||
kCompoundEnginePartitioning, | |||
kSecondPartitioning, | |||
kMerging | |||
}; | |||
GraphPartitioner() : partition_times_(0){}; | |||
~GraphPartitioner() = default; | |||
@@ -136,6 +141,8 @@ class GraphPartitioner { | |||
void ClearAllPartitionData(); | |||
void SetMergedGraphId(ComputeGraphPtr &output_merged_compute_graph); | |||
const NodeEngineMap *GetNodeEngineMap() const; | |||
struct GraphPartitionInfo { | |||
EnginePlacer engine_placer_; | |||
PartitionMap partitions_; // sub-graphs after partition <sub-graph-id, ComputeGraphPtr> | |||
@@ -165,12 +172,12 @@ class GraphPartitioner { | |||
pld_2_end_.clear(); | |||
end_2_pld_.clear(); | |||
if (mode_ == kMerging) { | |||
mode_ = kPartitioning; | |||
mode_ = kAtomicEnginePartitioning; | |||
} else { | |||
mode_ = mode; | |||
} | |||
} | |||
GraphPartitionInfo() : num_of_pld_end_(0), input_size_(0), output_size_(0), mode_(kPartitioning) {} | |||
GraphPartitionInfo() : num_of_pld_end_(0), input_size_(0), output_size_(0), mode_(kAtomicEnginePartitioning) {} | |||
~GraphPartitionInfo() = default; | |||
}; | |||
std::unordered_map<ComputeGraphPtr, GraphPartitionInfo> graph_2_graph_partition_info_; | |||
@@ -178,8 +185,10 @@ class GraphPartitioner { | |||
Graph2InputNodesSubGraphInfo graph_2_input_subgraph_; | |||
GraphPartitionInfo graph_info_; | |||
uint32_t partition_times_; // times of call partition | |||
std::map<Mode, std::string> mode_2_str_ = {{kPartitioning, "Partitioning"}, | |||
{kSecondPartitioning, "SecondPartitioning"}, {kMerging, "Merging"}}; | |||
std::map<Mode, std::string> mode_2_str_ = {{ kAtomicEnginePartitioning, "AtomicEnginePartitioning" }, | |||
{ kCompoundEnginePartitioning, "CompoundEnginePartitioning" }, | |||
{ kSecondPartitioning, "SecondPartitioning" }, | |||
{ kMerging, "Merging" }}; | |||
friend class GraphManager; | |||
}; | |||
} // namespace ge | |||
@@ -93,15 +93,15 @@ Status StagePartitioner::SplitStageLevel() { | |||
auto node = nodes.top(); | |||
nodes.pop(); | |||
GE_CHECK_NOTNULL(node->GetOpDesc()); | |||
uint32_t tmp_level = cur_stage_level; | |||
(void)AttrUtils::GetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, tmp_level); | |||
if (tmp_level != cur_stage_level) { | |||
continue; | |||
} | |||
for (const auto &in_node : node->GetInAllNodes()) { | |||
if (visited_stage_nodes.count(in_node) != 0) { | |||
continue; | |||
} | |||
uint32_t tmp_level = cur_stage_level; | |||
(void)AttrUtils::GetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, tmp_level); | |||
if (tmp_level != cur_stage_level) { | |||
continue; | |||
} | |||
if (!AttrUtils::SetInt(in_node->GetOpDesc(), ATTR_STAGE_LEVEL, cur_stage_level)) { | |||
REPORT_CALL_ERROR("E19999", "Set Attr %s on node %s failed.", | |||
ATTR_STAGE_LEVEL.c_str(), in_node->GetName().c_str()); | |||
@@ -128,315 +128,27 @@ Status StagePartitioner::SplitStageLevel() { | |||
Status StagePartitioner::StagePartition() { | |||
for (const auto &stage : stage_nodes_) { | |||
StageInfo stage_info(stage.first); | |||
FindStageIO(stage.second, stage_info); | |||
std::string subgraph_name = "Subgraph_Level_" + std::to_string(stage.first); | |||
NodePtr graph_node = BuildSubgraphNode(subgraph_name, stage_info); | |||
if (graph_node == nullptr) { | |||
GELOGE(FAILED, "[Build][SubgraphNode] for stage %u failed, graph name:%s.", stage.first, subgraph_name.c_str()); | |||
const std::string &subgraph_name = "Subgraph_Level_" + std::to_string(stage.first); | |||
const auto &stage_subgraph = GraphUtils::BuildSubgraphWithNodes(root_graph_, stage.second, subgraph_name); | |||
if (stage_subgraph == nullptr) { | |||
REPORT_CALL_ERROR("E19999", "Build subgraph %s failed.", subgraph_name.c_str()); | |||
GELOGE(FAILED, "[Build][Subgraph] %s failed.", subgraph_name.c_str()); | |||
return FAILED; | |||
} | |||
ComputeGraphPtr subgraph = BuildStageGraph(graph_node, stage_info); | |||
if (subgraph == nullptr) { | |||
GELOGE(FAILED, "[Build][StageGraph] %s for stage %u failed.", graph_node->GetName().c_str(), stage.first); | |||
if (!AttrUtils::SetInt(stage_subgraph, ATTR_STAGE_LEVEL, stage.first)) { | |||
REPORT_CALL_ERROR("E19999", "Set attr %s on graph %s failed.", ATTR_STAGE_LEVEL.c_str(), | |||
stage_subgraph->GetName().c_str()); | |||
GELOGE(FAILED, "[Set][Attr] %s on graph %s failed.", ATTR_STAGE_LEVEL.c_str(), stage_subgraph->GetName().c_str()); | |||
return FAILED; | |||
} | |||
if (root_graph_->AddSubgraph(subgraph) != GRAPH_SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "add subgraph:%s in root graph:%s of stage %u failed.", | |||
subgraph->GetName().c_str(), root_graph_->GetName().c_str(), stage.first); | |||
GELOGE(FAILED, "[Add][SubGraph] %s in root graph:%s of stage %u failed.", | |||
subgraph->GetName().c_str(), root_graph_->GetName().c_str(), stage.first); | |||
const auto &parent_node = stage_subgraph->GetParentNode(); | |||
GE_CHECK_NOTNULL(parent_node); | |||
if (!AttrUtils::SetInt(parent_node->GetOpDesc(), ATTR_STAGE_LEVEL, stage.first)) { | |||
REPORT_CALL_ERROR("E19999", "Set attr %s on node %s failed", ATTR_STAGE_LEVEL.c_str(), | |||
parent_node->GetName().c_str()); | |||
GELOGE(FAILED, "[Set][Attr] %s on node %s failed", ATTR_STAGE_LEVEL.c_str(), parent_node->GetName().c_str()); | |||
return FAILED; | |||
} | |||
if ((RelinkDataEdges(graph_node, stage_info) != SUCCESS) || | |||
(RelinkCtrlEdges(graph_node, stage_info) != SUCCESS)) { | |||
GELOGE(FAILED, "[ReLink][Edges] for stage %u failed, graph_node:%s.", stage.first, graph_node->GetName().c_str()); | |||
return FAILED; | |||
} | |||
for (const auto &stage_node : stage.second) { | |||
if (GraphUtils::RemoveNodeWithoutRelink(root_graph_, stage_node) != GRAPH_SUCCESS) { | |||
GELOGW("Remove node %s failed.", stage_node->GetName().c_str()); | |||
} | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
void StagePartitioner::FindStageIO(const std::unordered_set<NodePtr> &stage_nodes, StageInfo &stage_info) { | |||
for (const auto &node : stage_nodes) { | |||
// stage nodes | |||
stage_info.stage_nodes.emplace(node); | |||
// in data nodes | |||
for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { | |||
OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | |||
if (peer_out_anchor == nullptr) { | |||
continue; | |||
} | |||
if (stage_nodes.count(peer_out_anchor->GetOwnerNode()) == 0) { | |||
stage_info.data_inputs.emplace_back(std::make_pair(peer_out_anchor, in_data_anchor)); | |||
} else { | |||
stage_info.inner_data_edges.emplace_back(std::make_pair(peer_out_anchor, in_data_anchor)); | |||
} | |||
} | |||
// out data nodes | |||
std::list<InDataAnchorPtr> peer_data_anchors; | |||
for (const auto &out_data_anchor : node->GetAllOutDataAnchors()) { | |||
peer_data_anchors.clear(); | |||
for (const auto &peer_in_anchor : out_data_anchor->GetPeerInDataAnchors()) { | |||
if (stage_nodes.count(peer_in_anchor->GetOwnerNode()) == 0) { | |||
peer_data_anchors.emplace_back(peer_in_anchor); | |||
} | |||
} | |||
if (!peer_data_anchors.empty()) { | |||
stage_info.data_outputs.emplace_back(std::make_pair(out_data_anchor, peer_data_anchors)); | |||
} | |||
} | |||
// in ctrl nodes | |||
for (const auto &in_ctrl_node : node->GetInControlNodes()) { | |||
if (stage_nodes.count(in_ctrl_node) == 0) { | |||
stage_info.ctrl_inputs.emplace_back(in_ctrl_node->GetOutControlAnchor(), node->GetInControlAnchor()); | |||
} else { | |||
stage_info.inner_ctrl_edges.emplace_back(std::make_pair(in_ctrl_node->GetOutControlAnchor(), | |||
node->GetInControlAnchor())); | |||
} | |||
} | |||
// out ctrl nodes | |||
for (const auto &out_ctrl_node : node->GetOutControlNodes()) { | |||
if (stage_nodes.count(out_ctrl_node) == 0) { | |||
stage_info.ctrl_outputs.emplace_back(node->GetOutControlAnchor(), out_ctrl_node->GetInControlAnchor()); | |||
} | |||
} | |||
} | |||
} | |||
NodePtr StagePartitioner::BuildSubgraphNode(const std::string &graph_name, const StageInfo &stage_info) { | |||
OpDescBuilder op_desc_builder(graph_name, PARTITIONEDCALL); | |||
size_t input_num = stage_info.data_inputs.size(); | |||
for (size_t i = 0; i < input_num; i++) { | |||
auto input_desc = stage_info.data_inputs[i].second->GetOwnerNode()->GetOpDesc(); | |||
if (input_desc == nullptr) { | |||
GELOGE(PARAM_INVALID, "[Check][Param] op_desc is null, node:%s", | |||
stage_info.data_inputs[i].second->GetOwnerNode()->GetName().c_str()); | |||
return nullptr; | |||
} | |||
op_desc_builder.AddInput("args" + std::to_string(i), | |||
input_desc->GetInputDesc(stage_info.data_inputs[i].second->GetIdx())); | |||
} | |||
size_t output_num = stage_info.data_outputs.size(); | |||
for (size_t i = 0; i < output_num; i++) { | |||
auto output_desc = stage_info.data_outputs[i].first->GetOwnerNode()->GetOpDesc(); | |||
if (output_desc == nullptr) { | |||
GELOGE(PARAM_INVALID, "[Check][Param] op_desc is null, node:%s", | |||
stage_info.data_outputs[i].first->GetOwnerNode()->GetName().c_str()); | |||
return nullptr; | |||
} | |||
op_desc_builder.AddOutput("output" + std::to_string(i), | |||
output_desc->GetOutputDesc(stage_info.data_outputs[i].first->GetIdx())); | |||
} | |||
OpDescPtr op_desc = op_desc_builder.Build(); | |||
if (op_desc == nullptr) { | |||
GELOGE(FAILED, "[Create][OpDesc] for subgraph node failed, name:%s.", graph_name.c_str()); | |||
return nullptr; | |||
} | |||
op_desc->AddSubgraphName("f"); | |||
op_desc->SetSubgraphInstanceName(0, graph_name); | |||
if (!AttrUtils::SetInt(op_desc, ATTR_STAGE_LEVEL, stage_info.stage_level)) { | |||
REPORT_CALL_ERROR("E19999", "set attr %s on node %s failed", ATTR_STAGE_LEVEL.c_str(), op_desc->GetName().c_str()); | |||
GELOGE(INTERNAL_ERROR, "[Set][Attr] %s on node %s failed", ATTR_STAGE_LEVEL.c_str(), op_desc->GetName().c_str()); | |||
return nullptr; | |||
} | |||
NodePtr subgraph_node = root_graph_->AddNode(op_desc); | |||
if (subgraph_node == nullptr) { | |||
REPORT_CALL_ERROR("E19999", "add node:%s in graph:%s failed.", | |||
op_desc->GetName().c_str(), root_graph_->GetName().c_str()); | |||
GELOGE(FAILED, "[Add][Node] %s in graph:%s failed.", op_desc->GetName().c_str(), root_graph_->GetName().c_str()); | |||
return nullptr; | |||
} | |||
if (subgraph_node->SetOwnerComputeGraph(root_graph_) != GRAPH_SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "SetOwnerComputeGraph for node %s failed, grpah:%s.", | |||
subgraph_node->GetName().c_str(), root_graph_->GetName().c_str()); | |||
GELOGE(FAILED, "[Set][OwnerGraph] for node %s failed, grpah:%s.", | |||
subgraph_node->GetName().c_str(), root_graph_->GetName().c_str()); | |||
return nullptr; | |||
} | |||
return subgraph_node; | |||
} | |||
ComputeGraphPtr StagePartitioner::BuildStageGraph(const NodePtr &subgraph_node, const StageInfo &stage_info) { | |||
CompleteGraphBuilder graph_builder(subgraph_node->GetName(), false); | |||
// Add parent node | |||
graph_builder.SetParentNode(subgraph_node); | |||
// Add node | |||
for (const auto &node : stage_info.stage_nodes) { | |||
graph_builder.AddNode(AttrUtils::CopyOpDesc(node->GetOpDesc())); | |||
} | |||
// Set Input | |||
size_t data_input_num = stage_info.data_inputs.size(); | |||
for (size_t i = 0; i < data_input_num; i++) { | |||
graph_builder.SetInput(i, { stage_info.data_inputs[i].second->GetOwnerNode()->GetName() }, | |||
{ static_cast<uint32_t>(stage_info.data_inputs[i].second->GetIdx()) }); | |||
} | |||
// Add Outputs | |||
size_t data_output_num = stage_info.data_outputs.size(); | |||
for (uint32_t i = 0; i < data_output_num; i++) { | |||
graph_builder.AddOutput(stage_info.data_outputs[i].first->GetOwnerNode()->GetName(), | |||
stage_info.data_outputs[i].first->GetIdx()); | |||
} | |||
// Add Data Edges | |||
for (const auto &data_edge : stage_info.inner_data_edges) { | |||
graph_builder.AddDataLink(data_edge.first->GetOwnerNode()->GetName(), data_edge.first->GetIdx(), | |||
data_edge.second->GetOwnerNode()->GetName(), data_edge.second->GetIdx()); | |||
} | |||
// Add Ctrl Edges | |||
for (const auto &ctrl_edge : stage_info.inner_ctrl_edges) { | |||
graph_builder.AddControlLink(ctrl_edge.first->GetOwnerNode()->GetName(), | |||
ctrl_edge.second->GetOwnerNode()->GetName()); | |||
} | |||
// Add Input-Mapping | |||
std::map<uint32_t, uint32_t> input_mapping; | |||
for (size_t i = 0; i < data_input_num; i++) { | |||
input_mapping[i] = i; | |||
} | |||
graph_builder.SetInputMapping(input_mapping); | |||
// Add outputMapping | |||
std::map<uint32_t, uint32_t> output_mapping; | |||
for (size_t i = 0; i < data_output_num; i++) { | |||
output_mapping[i] = i; | |||
} | |||
graph_builder.SetOutputMapping(output_mapping); | |||
graphStatus error_code = GRAPH_SUCCESS; | |||
std::string error_msg; | |||
ComputeGraphPtr subgraph = graph_builder.Build(error_code, error_msg); | |||
if (subgraph == nullptr) { | |||
GELOGE(error_code, "[Build][Subgraph] %s failed:%s.", subgraph_node->GetName().c_str(), error_msg.c_str()); | |||
return nullptr; | |||
} | |||
if (!AttrUtils::SetInt(subgraph, ATTR_STAGE_LEVEL, stage_info.stage_level)) { | |||
REPORT_CALL_ERROR("E19999", "set attr %s on graph %s failed.", | |||
ATTR_STAGE_LEVEL.c_str(), subgraph->GetName().c_str()); | |||
GELOGE(FAILED, "[Set][Attr] %s on graph %s failed.", ATTR_STAGE_LEVEL.c_str(), subgraph->GetName().c_str()); | |||
return nullptr; | |||
} | |||
return subgraph; | |||
} | |||
Status StagePartitioner::RelinkDataEdges(const NodePtr &subgraph_node, const StageInfo &stage_info) { | |||
// in data nodes | |||
for (size_t i = 0; i < stage_info.data_inputs.size(); i++) { | |||
if (stage_info.data_inputs[i].first->Unlink(stage_info.data_inputs[i].second) != GRAPH_SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "remove data edge from %s:%d to %s:%d failed", | |||
stage_info.data_inputs[i].first->GetOwnerNode()->GetName().c_str(), | |||
stage_info.data_inputs[i].first->GetIdx(), | |||
stage_info.data_inputs[i].second->GetOwnerNode()->GetName().c_str(), | |||
stage_info.data_inputs[i].second->GetIdx()); | |||
GELOGE(INTERNAL_ERROR, "[Remove][DataEdge] %s:%d->%s:%d failed.", | |||
stage_info.data_inputs[i].first->GetOwnerNode()->GetName().c_str(), | |||
stage_info.data_inputs[i].first->GetIdx(), | |||
stage_info.data_inputs[i].second->GetOwnerNode()->GetName().c_str(), | |||
stage_info.data_inputs[i].second->GetIdx()); | |||
return INTERNAL_ERROR; | |||
} | |||
if (stage_info.data_inputs[i].first->LinkTo(subgraph_node->GetInDataAnchor(i)) != GRAPH_SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "add data edge from %s:%d to %s:%zu failed.", | |||
stage_info.data_inputs[i].first->GetOwnerNode()->GetName().c_str(), | |||
stage_info.data_inputs[i].first->GetIdx(), | |||
subgraph_node->GetName().c_str(), i); | |||
GELOGE(INTERNAL_ERROR, "[Add][DataEdge] %s:%d->%s:%zu failed.", | |||
stage_info.data_inputs[i].first->GetOwnerNode()->GetName().c_str(), | |||
stage_info.data_inputs[i].first->GetIdx(), | |||
subgraph_node->GetName().c_str(), i); | |||
return INTERNAL_ERROR; | |||
} | |||
} | |||
// out data nodes | |||
for (size_t i = 0; i < stage_info.data_outputs.size(); i++) { | |||
const auto &out_data_anchor = subgraph_node->GetOutDataAnchor(i); | |||
GE_CHECK_NOTNULL(out_data_anchor); | |||
for (const auto &peer_in_anchor : stage_info.data_outputs[i].second) { | |||
if (stage_info.data_outputs[i].first->Unlink(peer_in_anchor) != GRAPH_SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Remove data edge from %s:%d to %s:%d failed.", | |||
stage_info.data_outputs[i].first->GetOwnerNode()->GetName().c_str(), | |||
stage_info.data_outputs[i].first->GetIdx(), | |||
peer_in_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetIdx()); | |||
GELOGE(INTERNAL_ERROR, "[Remove][DataEdge] %s:%d->%s:%d failed.", | |||
stage_info.data_outputs[i].first->GetOwnerNode()->GetName().c_str(), | |||
stage_info.data_outputs[i].first->GetIdx(), | |||
peer_in_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetIdx()); | |||
return INTERNAL_ERROR; | |||
} | |||
if (out_data_anchor->LinkTo(peer_in_anchor) != GRAPH_SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Add data edge from %s:%zu to %s:%d failed.", subgraph_node->GetName().c_str(), i, | |||
peer_in_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetIdx()); | |||
GELOGE(INTERNAL_ERROR, "[Add][DataEdge] %s:%zu->%s:%d failed.", subgraph_node->GetName().c_str(), i, | |||
peer_in_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetIdx()); | |||
return INTERNAL_ERROR; | |||
} | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status StagePartitioner::RelinkCtrlEdges(const NodePtr &subgraph_node, const StageInfo &stage_info) { | |||
// in ctrl nodes | |||
for (const auto &ctrl_input : stage_info.ctrl_inputs) { | |||
if (ctrl_input.first->Unlink(ctrl_input.second) != GRAPH_SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Remove ctrl edge %s->%s failed.", | |||
ctrl_input.first->GetOwnerNode()->GetName().c_str(), | |||
ctrl_input.second->GetOwnerNode()->GetName().c_str()); | |||
GELOGE(INTERNAL_ERROR, "[Remove][CtrlEdge] %s->%s failed.", | |||
ctrl_input.first->GetOwnerNode()->GetName().c_str(), ctrl_input.second->GetOwnerNode()->GetName().c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
if (!ctrl_input.first->IsLinkedWith(subgraph_node->GetInControlAnchor())) { | |||
if (ctrl_input.first->LinkTo(subgraph_node->GetInControlAnchor()) != GRAPH_SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Add ctrl edge %s->%s failed.", | |||
ctrl_input.first->GetOwnerNode()->GetName().c_str(), subgraph_node->GetName().c_str()); | |||
GELOGE(INTERNAL_ERROR, "[Add][CtrlEdge] %s->%s failed.", | |||
ctrl_input.first->GetOwnerNode()->GetName().c_str(), subgraph_node->GetName().c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
} | |||
} | |||
// out ctrl nodes | |||
for (const auto &ctrl_output : stage_info.ctrl_outputs) { | |||
if (ctrl_output.first->Unlink(ctrl_output.second) != GRAPH_SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Remove ctrl edge %s->%s failed.", | |||
ctrl_output.first->GetOwnerNode()->GetName().c_str(), | |||
ctrl_output.second->GetOwnerNode()->GetName().c_str()); | |||
GELOGE(INTERNAL_ERROR, "[Remove][CtrlEdge] %s->%s failed.", | |||
ctrl_output.first->GetOwnerNode()->GetName().c_str(), | |||
ctrl_output.second->GetOwnerNode()->GetName().c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
if (!subgraph_node->GetOutControlAnchor()->IsLinkedWith(ctrl_output.second)) { | |||
if (subgraph_node->GetOutControlAnchor()->LinkTo(ctrl_output.second) != GRAPH_SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Add ctrl edge %s->%s failed.", | |||
subgraph_node->GetName().c_str(), ctrl_output.second->GetOwnerNode()->GetName().c_str()); | |||
GELOGE(INTERNAL_ERROR, "[Add][CtrlEdge] %s->%s failed.", | |||
subgraph_node->GetName().c_str(), ctrl_output.second->GetOwnerNode()->GetName().c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
} | |||
} | |||
return SUCCESS; | |||
@@ -17,26 +17,10 @@ | |||
#ifndef GE_GRAPH_PARTITION_STAGE_PARTITION_H_ | |||
#define GE_GRAPH_PARTITION_STAGE_PARTITION_H_ | |||
#include <map> | |||
#include <unordered_set> | |||
#include <list> | |||
#include <utility> | |||
#include "framework/common/ge_inner_error_codes.h" | |||
#include "graph/compute_graph.h" | |||
namespace ge { | |||
struct StageInfo { | |||
explicit StageInfo(uint32_t level) : stage_level(level) {} | |||
uint32_t stage_level; | |||
std::unordered_set<NodePtr> stage_nodes; | |||
std::vector<std::pair<OutDataAnchorPtr, InDataAnchorPtr>> data_inputs; | |||
std::vector<std::pair<OutDataAnchorPtr, std::list<InDataAnchorPtr>>> data_outputs; | |||
std::list<std::pair<OutControlAnchorPtr, InControlAnchorPtr>> ctrl_inputs; | |||
std::list<std::pair<OutControlAnchorPtr, InControlAnchorPtr>> ctrl_outputs; | |||
std::list<std::pair<OutDataAnchorPtr, InDataAnchorPtr>> inner_data_edges; | |||
std::list<std::pair<OutControlAnchorPtr, InControlAnchorPtr>> inner_ctrl_edges; | |||
}; | |||
class StagePartitioner { | |||
public: | |||
explicit StagePartitioner(ComputeGraphPtr graph) : root_graph_(std::move(graph)) {} | |||
@@ -49,18 +33,8 @@ class StagePartitioner { | |||
Status StagePartition(); | |||
static void FindStageIO(const std::unordered_set<NodePtr> &stage_nodes, StageInfo &stage_info); | |||
NodePtr BuildSubgraphNode(const std::string &graph_name, const StageInfo &stage_info); | |||
static ComputeGraphPtr BuildStageGraph(const NodePtr &subgraph_node, const StageInfo &stage_info); | |||
static Status RelinkDataEdges(const NodePtr &subgraph_node, const StageInfo &stage_info); | |||
static Status RelinkCtrlEdges(const NodePtr &subgraph_node, const StageInfo &stage_info); | |||
ComputeGraphPtr root_graph_; | |||
std::map<uint32_t, std::unordered_set<NodePtr>> stage_nodes_; | |||
std::map<uint32_t, std::set<NodePtr>> stage_nodes_; | |||
}; | |||
} // namespace ge | |||
@@ -20,41 +20,36 @@ | |||
#include <vector> | |||
#include "init/gelib.h" | |||
#include "graph/node.h" | |||
namespace ge { | |||
Status EndOfSequenceAddControlPass::Run(ComputeGraphPtr graph) { | |||
if (graph == nullptr) { | |||
REPORT_INNER_ERROR("E19999", "Param graph is nullptr, check invalid"); | |||
GELOGE(PARAM_INVALID, "[Check][Param] param [graph] must not be null."); | |||
return PARAM_INVALID; | |||
} | |||
if (graph->GetParentGraph() != nullptr) { | |||
return SUCCESS; | |||
} | |||
NodePtr end_of_sequence = GetEndOfSequence(graph); | |||
const auto &end_of_sequence = graph->FindFirstNodeMatchType(ENDOFSEQUENCE); | |||
if (end_of_sequence == nullptr) { | |||
return SUCCESS; | |||
} | |||
GELOGI("EndOfSequenceAddControlPass begin."); | |||
std::shared_ptr<GELib> instance_ptr = GELib::GetInstance(); | |||
if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { | |||
GELOGE(FAILED, "GELib not initialized"); | |||
return FAILED; | |||
} | |||
std::vector<NodePtr> target_nodes; | |||
for (NodePtr &node : graph->GetDirectNode()) { | |||
if (node == nullptr) { | |||
GELOGW("node is nullptr."); | |||
continue; | |||
} | |||
string stream_label; | |||
(void)AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label); | |||
if (!stream_label.empty() || IsDataLikeNode(node)) { | |||
// op_desc of node should not be null | |||
if (node->GetOpDesc()->HasAttr(ATTR_NAME_STREAM_LABEL) || | |||
instance_ptr->DNNEngineManagerObj().IsStreamAssignSkip(node)) { | |||
continue; | |||
} | |||
// Save the nodes whose pre-nodes are all data-like node | |||
auto in_data_nodes = node->GetInDataNodes(); | |||
bool flag = false; | |||
for (auto in_node : in_data_nodes) { | |||
if (!IsDataLikeNode(in_node)) { | |||
for (const auto &in_node : node->GetInDataNodes()) { | |||
if (!instance_ptr->DNNEngineManagerObj().IsStreamAssignSkip(in_node)) { | |||
flag = true; | |||
break; | |||
} | |||
@@ -64,83 +59,20 @@ Status EndOfSequenceAddControlPass::Run(ComputeGraphPtr graph) { | |||
} | |||
target_nodes.push_back(node); | |||
} | |||
// Insert control edge | |||
Status status = AddControlEdge(end_of_sequence, target_nodes); | |||
if (status != SUCCESS) { | |||
GELOGE(FAILED, "[Add][ControlEdge] Graph add EndOfSequence op:%s out ctrl edge failed.", | |||
end_of_sequence->GetName().c_str()); | |||
return FAILED; | |||
} | |||
GELOGI("EndOfSequenceAddControlPass end."); | |||
return SUCCESS; | |||
} | |||
Status EndOfSequenceAddControlPass::AddControlEdge(NodePtr &end_of_sequence, std::vector<NodePtr> &target_nodes) { | |||
auto out_ctrl_anchor = end_of_sequence->GetOutControlAnchor(); | |||
for (NodePtr &node : target_nodes) { | |||
auto in_ctrl_anchor = node->GetInControlAnchor(); | |||
if (in_ctrl_anchor == nullptr) { | |||
continue; | |||
} | |||
Status status = GraphUtils::AddEdge(out_ctrl_anchor, in_ctrl_anchor); | |||
if (status != GRAPH_SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Add control edge between op:%s(%s) and op:%s(%s) failed", | |||
end_of_sequence->GetName().c_str(), end_of_sequence->GetType().c_str(), | |||
node->GetName().c_str(), node->GetType().c_str()); | |||
GELOGE(FAILED, "[Add][ControlEdge] between op:%s(%s) and op:%s(%s) failed", | |||
end_of_sequence->GetName().c_str(), end_of_sequence->GetType().c_str(), | |||
node->GetName().c_str(), node->GetType().c_str()); | |||
// Insert control edge | |||
for (const auto &node : target_nodes) { | |||
GELOGI("Add ctrl edge between %s and %s", end_of_sequence->GetName().c_str(), node->GetName().c_str()); | |||
if (GraphUtils::AddEdge(end_of_sequence->GetOutControlAnchor(), node->GetInControlAnchor()) != GRAPH_SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Add ctrl edge between %s and %s failed", end_of_sequence->GetName().c_str(), | |||
node->GetName().c_str()); | |||
GELOGE(FAILED, "[Add][CtrlEdge] between %s and %s failed", end_of_sequence->GetName().c_str(), | |||
node->GetName().c_str()); | |||
return FAILED; | |||
} | |||
GELOGI("Graph add EndOfSequence op out ctrl edge, dst node: %s.", node->GetName().c_str()); | |||
} | |||
return SUCCESS; | |||
} | |||
inline NodePtr EndOfSequenceAddControlPass::GetEndOfSequence(const ComputeGraphPtr &graph) const { | |||
// Internal function, guaranteeing graph non-null | |||
for (NodePtr &node : graph->GetDirectNode()) { | |||
if (node->GetType() == ENDOFSEQUENCE) { | |||
return node; | |||
} | |||
} | |||
return nullptr; | |||
} | |||
bool EndOfSequenceAddControlPass::IsDataLikeNode(const NodePtr &node) { | |||
std::shared_ptr<GELib> instance_ptr = GELib::GetInstance(); | |||
if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { | |||
GELOGW("GELib not initialized"); | |||
return false; | |||
} | |||
OpDescPtr op_desc = node->GetOpDesc(); | |||
if (op_desc == nullptr) { | |||
return false; | |||
} | |||
string engine_name = op_desc->GetOpEngineName(); | |||
if (engine_name.empty()) { | |||
engine_name = instance_ptr->DNNEngineManagerObj().GetDNNEngineName(node); | |||
} | |||
const map<string, SchedulerConf> schedulers = instance_ptr->DNNEngineManagerObj().GetSchedulers(); | |||
// Only one scheduler has been supported by now | |||
for (auto schedulers_iter = schedulers.begin(); schedulers_iter != schedulers.end(); ++schedulers_iter) { | |||
const map<string, EngineConfPtr> cal_engines = schedulers_iter->second.cal_engines; | |||
auto cal_engines_iter = cal_engines.find(engine_name); | |||
if (cal_engines_iter == cal_engines.end()) { | |||
GELOGW("No cal_engines found within engine %s, node name %s", engine_name.c_str(), node->GetName().c_str()); | |||
continue; | |||
} | |||
EngineConfPtr engine_conf_ptr = cal_engines_iter->second; | |||
if (engine_conf_ptr == nullptr) { | |||
GELOGW("engine_conf_ptr within engine %s, node name %s is null", engine_name.c_str(), node->GetName().c_str()); | |||
continue; | |||
} | |||
bool skip_assign_stream = engine_conf_ptr->skip_assign_stream; | |||
if (skip_assign_stream) { | |||
return true; | |||
} | |||
return false; | |||
} | |||
return false; | |||
GELOGI("EndOfSequenceAddControlPass end."); | |||
return SUCCESS; | |||
} | |||
} // namespace ge |
@@ -30,26 +30,6 @@ class EndOfSequenceAddControlPass : public GraphPass { | |||
~EndOfSequenceAddControlPass() override {} | |||
Status Run(ComputeGraphPtr graph) override; | |||
private: | |||
/** | |||
* Get EndOfSequence node in graph, nullptr if not exist. | |||
* @param graph | |||
* @return EndOfSequence node | |||
*/ | |||
inline NodePtr GetEndOfSequence(const ComputeGraphPtr &graph) const; | |||
/** | |||
* Check whether this node is a data-like node. | |||
* @param node | |||
* @return | |||
*/ | |||
bool IsDataLikeNode(const NodePtr &node); | |||
/** | |||
* Check whether this node is a data-like node. | |||
* @param node | |||
* @return | |||
*/ | |||
Status AddControlEdge(NodePtr &end_of_sequence, std::vector<NodePtr> &target_nodes); | |||
}; | |||
} // namespace ge | |||
@@ -23,7 +23,6 @@ | |||
#include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h" | |||
#include "common/formats/format_transfers/format_transfer_transpose.h" | |||
#include "common/formats/utils/formats_trans_utils.h" | |||
#include "common/util/error_manager/error_manager.h" | |||
#include "framework/common/helper/model_helper.h" | |||
#include "common/math/math_util.h" | |||
#include "framework/common/op/ge_op_utils.h" | |||
@@ -39,7 +38,6 @@ | |||
#include "graph/passes/addn_pass.h" | |||
#include "graph/passes/aicpu_constant_folding_pass.h" | |||
#include "graph/passes/assert_pass.h" | |||
#include "external/ge/ge_api_types.h" | |||
#include "graph/passes/common_subexpression_elimination_pass.h" | |||
#include "graph/passes/cond_pass.h" | |||
#include "graph/passes/cond_remove_pass.h" | |||
@@ -774,7 +772,12 @@ Status UpdateSubgraphDataOfCase(NodePtr &mbatch_node, DataType &dt_set, int32_t | |||
return SUCCESS; | |||
} | |||
auto subgraphs = NodeUtils::GetAllSubgraphs(*mbatch_node); | |||
std::vector<ComputeGraphPtr> subgraphs; | |||
if (NodeUtils::GetSubgraphs(mbatch_node, subgraphs) != GRAPH_SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Get subgraphs of node %s failed", mbatch_node->GetName().c_str()); | |||
GELOGE(FAILED, "[Check][Param] Get subgraphs of node %s failed", mbatch_node->GetName().c_str()); | |||
return FAILED; | |||
} | |||
for (const auto &subgraph : subgraphs) { | |||
GE_CHECK_NOTNULL(subgraph); | |||
for (auto &sub_node : subgraph->GetDirectNode()) { | |||
@@ -60,7 +60,6 @@ const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE"; | |||
const char *const kForceInfershape = "_force_infershape_when_running"; | |||
const std::set<std::string> kExecutionDependentTypes{ IF, STATELESSIF, CASE, STREAMSWITCH }; | |||
const std::set<std::string> kMergeInputSkipTypes{ STREAMACTIVE, STREAMSWITCH, CONSTANT, CONSTANTOP }; | |||
const std::set<std::string> kStreamActiveTypes{ ENTER, REFENTER, NEXTITERATION, REFNEXTITERATION }; | |||
Status SetOutputNameAttr(ComputeGraph &graph) { | |||
@@ -519,170 +518,6 @@ Status HybridModelBuilder::UpdateAnchorStatus(const NodePtr &node) { | |||
return SUCCESS; | |||
} | |||
Status HybridModelBuilder::DoUnlinkDataAnchors(const OutDataAnchorPtr &out_data_anchor, | |||
const InDataAnchorPtr &in_data_anchor) { | |||
GE_CHK_GRAPH_STATUS_RET(out_data_anchor->Unlink(in_data_anchor), | |||
"[Invoke][Unlink] failed to unlink %s:%d from %s:%d", | |||
out_data_anchor->GetOwnerNode()->GetName().c_str(), out_data_anchor->GetIdx(), | |||
in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetIdx()); | |||
GELOGD("Succeeded in unlinking %s:%d from %s:%d", | |||
out_data_anchor->GetOwnerNode()->GetName().c_str(), | |||
out_data_anchor->GetIdx(), | |||
in_data_anchor->GetOwnerNode()->GetName().c_str(), | |||
in_data_anchor->GetIdx()); | |||
return SUCCESS; | |||
} | |||
Status HybridModelBuilder::DoLinkDataAnchors(OutDataAnchorPtr &out_data_anchor, InDataAnchorPtr &in_data_anchor) { | |||
GE_CHK_GRAPH_STATUS_RET(out_data_anchor->LinkTo(in_data_anchor), "[Invoke][LinkTo]Failed to link %s:%d to %s:%d", | |||
out_data_anchor->GetOwnerNode()->GetName().c_str(), | |||
out_data_anchor->GetIdx(), | |||
in_data_anchor->GetOwnerNode()->GetName().c_str(), | |||
in_data_anchor->GetIdx()); | |||
GELOGD("Succeeded in linking %s:%d to %s:%d", | |||
out_data_anchor->GetOwnerNode()->GetName().c_str(), | |||
out_data_anchor->GetIdx(), | |||
in_data_anchor->GetOwnerNode()->GetName().c_str(), | |||
in_data_anchor->GetIdx()); | |||
return SUCCESS; | |||
} | |||
Status HybridModelBuilder::MergeInputNodes(ComputeGraph &graph) { | |||
const auto &wrapped_node = graph.GetParentNode(); | |||
std::set<NodePtr> root_nodes; | |||
for (const auto &node : graph.GetDirectNode()) { | |||
GE_CHECK_NOTNULL(node); | |||
if (node->GetType() != DATA_TYPE) { | |||
if (node->GetInDataNodes().empty()) { | |||
root_nodes.emplace(node); | |||
} | |||
continue; | |||
} | |||
auto data_op_desc = node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(data_op_desc); | |||
uint32_t parent_index = 0; | |||
if (!AttrUtils::GetInt(data_op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | |||
GELOGE(FAILED, "[Invoke][GetInt] failed, node:[%s] attr:[%s]", | |||
data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str()); | |||
REPORT_CALL_ERROR("E19999", "GetInt failed, node:[%s] attr:[%s]", | |||
data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str()); | |||
return FAILED; | |||
} | |||
auto wrapped_node_in_anchor = wrapped_node->GetInDataAnchor(parent_index); | |||
GE_CHECK_NOTNULL(wrapped_node_in_anchor); | |||
auto src_out_anchor = wrapped_node_in_anchor->GetPeerOutAnchor(); | |||
if (src_out_anchor == nullptr || src_out_anchor->GetOwnerNode() == nullptr) { | |||
continue; | |||
} | |||
wrapped_node_in_anchor->UnlinkAll(); | |||
// link src to outputs of DataNode | |||
for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { | |||
GE_CHECK_NOTNULL(out_data_anchor); | |||
for (auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { | |||
auto dst_node = peer_in_data_anchor->GetOwnerNode(); | |||
GE_CHECK_NOTNULL(dst_node); | |||
const auto in_nodes = dst_node->GetInDataNodes(); | |||
if (std::all_of(in_nodes.begin(), in_nodes.end(), [](const NodePtr &n) { return n->GetType() == DATA; })) { | |||
root_nodes.emplace(dst_node); | |||
} | |||
GE_CHK_STATUS_RET_NOLOG(DoUnlinkDataAnchors(out_data_anchor, peer_in_data_anchor)); | |||
GE_CHK_STATUS_RET_NOLOG(DoLinkDataAnchors(src_out_anchor, peer_in_data_anchor)); | |||
} | |||
} | |||
} | |||
// transfer in control edges to all root nodes | |||
for (auto &root_node : root_nodes) { | |||
auto in_nodes = root_node->GetInAllNodes(); | |||
std::set<NodePtr> in_node_set(in_nodes.begin(), in_nodes.end()); | |||
for (auto &in_control_node : wrapped_node->GetInControlNodes()) { | |||
if (in_node_set.count(in_control_node) == 0 && kMergeInputSkipTypes.count(root_node->GetType()) == 0) { | |||
GELOGD("[%s] Restore control edge to [%s]", in_control_node->GetName().c_str(), root_node->GetName().c_str()); | |||
GE_CHECK_NOTNULL(in_control_node->GetOutControlAnchor()); | |||
(void) in_control_node->GetOutControlAnchor()->LinkTo(root_node->GetInControlAnchor()); | |||
} | |||
} | |||
} | |||
wrapped_node->GetInControlAnchor()->UnlinkAll(); | |||
return SUCCESS; | |||
} | |||
Status HybridModelBuilder::MergeNetOutputNode(ComputeGraph &graph) { | |||
const auto &parent_node = graph.GetParentNode(); | |||
const NodePtr &net_output_node = graph.FindFirstNodeMatchType(NETOUTPUT); | |||
if (net_output_node == nullptr) { | |||
GELOGD("Graph has no netoutput no need to merge"); | |||
return SUCCESS; | |||
} | |||
const auto &net_output_desc = net_output_node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(net_output_desc); | |||
auto all_in_nodes = net_output_node->GetInAllNodes(); | |||
auto all_out_nodes = parent_node->GetOutAllNodes(); | |||
net_output_node->GetInControlAnchor()->UnlinkAll(); | |||
parent_node->GetOutControlAnchor()->UnlinkAll(); | |||
for (const auto &in_data_anchor : net_output_node->GetAllInDataAnchors()) { | |||
auto src_out_anchor = in_data_anchor->GetPeerOutAnchor(); | |||
GE_CHECK_NOTNULL(src_out_anchor); | |||
GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode()); | |||
GE_CHK_STATUS_RET_NOLOG(DoUnlinkDataAnchors(src_out_anchor, in_data_anchor)); | |||
auto index = in_data_anchor->GetIdx(); | |||
auto input_desc = net_output_desc->MutableInputDesc(index); | |||
if (input_desc == nullptr) { | |||
GELOGE(INTERNAL_ERROR, "[Invoke][MutableInputDesc][%s] Failed to get input desc[%d]", | |||
net_output_desc->GetName().c_str(), index); | |||
REPORT_CALL_ERROR("E19999", "[%s] Failed to get input desc[%d].", net_output_desc->GetName().c_str(), index); | |||
return INTERNAL_ERROR; | |||
} | |||
uint32_t parent_index = 0; | |||
if (!AttrUtils::GetInt(input_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | |||
GELOGW("SubGraph: %s NetOutput input tensor %d, attr %s not found.", | |||
graph.GetName().c_str(), index, ATTR_NAME_PARENT_NODE_INDEX.c_str()); | |||
continue; | |||
} | |||
const OutDataAnchorPtr &parent_out_anchor = parent_node->GetOutDataAnchor(parent_index); | |||
GE_CHECK_NOTNULL(parent_out_anchor); | |||
for (InDataAnchorPtr &dst_in_anchor : parent_out_anchor->GetPeerInDataAnchors()) { | |||
if (dst_in_anchor == nullptr) { | |||
continue; | |||
} | |||
GE_CHECK_NOTNULL(dst_in_anchor->GetOwnerNode()); | |||
GE_CHK_STATUS_RET_NOLOG(DoUnlinkDataAnchors(parent_out_anchor, dst_in_anchor)); | |||
GE_CHK_STATUS_RET_NOLOG(DoLinkDataAnchors(src_out_anchor, dst_in_anchor)); | |||
} | |||
} | |||
// transfer out control edges | |||
std::set<NodePtr> in_node_set(all_in_nodes.begin(), all_in_nodes.end()); | |||
std::set<NodePtr> out_node_set(all_out_nodes.begin(), all_out_nodes.end()); | |||
for (auto &src_node : in_node_set) { | |||
GELOGD("[%s] process in node.", src_node->GetName().c_str()); | |||
auto out_nodes = src_node->GetOutAllNodes(); | |||
std::set<NodePtr> node_set(out_nodes.begin(), out_nodes.end()); | |||
for (auto &dst_node : out_node_set) { | |||
if (node_set.count(dst_node) == 0) { | |||
src_node->GetOutControlAnchor()->LinkTo(dst_node->GetInControlAnchor()); | |||
GELOGD("[%s] Restore control edge to [%s]", src_node->GetName().c_str(), dst_node->GetName().c_str()); | |||
} | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeGraphPtr &merged_graph) { | |||
merged_graph = MakeShared<ComputeGraph>("MergedGraph"); | |||
merged_graph->SetGraphUnknownFlag(root_graph->GetGraphUnknownFlag()); | |||
@@ -716,9 +551,21 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeG | |||
} | |||
} | |||
} | |||
GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraph(root_graph, merged_graph, *subgraph), | |||
const auto &filter = [](const ComputeGraphPtr &graph) { | |||
const auto &parent_node = graph->GetParentNode(); | |||
if (parent_node == nullptr || parent_node->GetOpDesc() == nullptr) { | |||
return false; | |||
} | |||
if ((parent_node->GetType() != PARTITIONEDCALL) || | |||
(parent_node->GetOpDesc()->GetSubgraphInstanceNames().size() != 1)) { | |||
return false; | |||
} | |||
return graph->GetGraphUnknownFlag(); | |||
}; | |||
GE_CHK_GRAPH_STATUS_RET(GraphUtils::UnfoldSubgraph(subgraph, filter), | |||
"[Invoke][UnfoldSubgraph][%s] Failed to merge subgraph.", | |||
subgraph->GetName().c_str()); | |||
subgraph->GetName().c_str()) | |||
} | |||
// invoke before adding subgraphs. in case modify node id in known-shaped subgraphs. | |||
@@ -744,56 +591,6 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeG | |||
return SUCCESS; | |||
} | |||
Status HybridModelBuilder::UnfoldSubgraph(ComputeGraphPtr &root_graph, | |||
ComputeGraphPtr &parent_graph, | |||
ComputeGraph &sub_graph) { | |||
auto parent_node = sub_graph.GetParentNode(); | |||
GE_CHECK_NOTNULL(parent_node); | |||
GE_CHK_STATUS_RET(MergeInputNodes(sub_graph), | |||
"[Invoke][MergeInputNodes][%s] Failed to merge data nodes for subgraph", | |||
sub_graph.GetName().c_str()); | |||
GE_CHK_STATUS_RET(MergeNetOutputNode(sub_graph), | |||
"[Invoke][MergeNetOutputNode][%s] Failed to merge net output nodes for subgraph", | |||
sub_graph.GetName().c_str()); | |||
GELOGD("[%s] Done merging subgraph inputs and outputs successfully", sub_graph.GetName().c_str()); | |||
for (auto &sub_node : sub_graph.GetDirectNode()) { | |||
auto sub_op_type = sub_node->GetType(); | |||
if (sub_op_type == DATA_TYPE || sub_op_type == NETOUTPUT) { | |||
continue; | |||
} | |||
if (sub_op_type == PARTITIONEDCALL) { | |||
auto sub_sub_graph = NodeUtils::GetSubgraph(*sub_node, kSubgraphIndex); | |||
GE_CHECK_NOTNULL(sub_sub_graph); | |||
if (sub_sub_graph->GetGraphUnknownFlag()) { | |||
GE_CHK_STATUS_RET(UnfoldSubgraph(root_graph, parent_graph, *sub_sub_graph), | |||
"[Invoke][UnfoldSubgraph][%s] Failed to merge subgraph", | |||
sub_sub_graph->GetName().c_str()); | |||
continue; | |||
} | |||
} | |||
if (!sub_node->GetOpDesc()->GetSubgraphInstanceNames().empty()) { | |||
for (size_t i = 0; i < sub_node->GetOpDesc()->GetSubgraphInstanceNames().size(); ++i) { | |||
auto sub_sub_graph = NodeUtils::GetSubgraph(*sub_node, i); | |||
GE_CHECK_NOTNULL(sub_sub_graph); | |||
sub_sub_graph->SetParentGraph(parent_graph); | |||
} | |||
} | |||
parent_graph->AddNode(sub_node); | |||
GELOGD("[%s::%s] added to parent graph: [%s].", | |||
sub_graph.GetName().c_str(), | |||
sub_node->GetName().c_str(), | |||
parent_graph->GetName().c_str()); | |||
sub_node->SetOwnerComputeGraph(parent_graph); | |||
} | |||
GELOGD("[%s] Done merging subgraph. remove it from root graph", sub_graph.GetName().c_str()); | |||
root_graph->RemoveSubgraph(sub_graph.GetName()); | |||
return SUCCESS; | |||
} | |||
Status HybridModelBuilder::BuildOutputMapping(GraphItem &graph_item, | |||
const NodeItem &node_item, | |||
bool is_root_graph) { | |||
@@ -39,16 +39,11 @@ class HybridModelBuilder { | |||
private: | |||
static Status UpdateAnchorStatus(const NodePtr &node); | |||
static Status DoUnlinkDataAnchors(const OutDataAnchorPtr &out_data_anchor, const InDataAnchorPtr &in_data_anchor); | |||
static Status DoLinkDataAnchors(OutDataAnchorPtr &out_data_anchor, InDataAnchorPtr &in_data_anchor); | |||
static NodePtr GetPeerNode(const InDataAnchorPtr &in_data_anchor); | |||
static Status GetParentNodeOutputIndex(const OpDesc &op_desc, int index, uint32_t &out_index); | |||
static Status GetPeerNodeAcrossSubGraphs(const NodePtr &data_node, NodePtr &peer_node, int &peer_out_index); | |||
static Status HandleDtString(const GeTensor &tensor, void *var_addr); | |||
static Status MergeInputNodes(ComputeGraph &compute_graph); | |||
static Status MergeNetOutputNode(ComputeGraph &compute_graph); | |||
static Status UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeGraphPtr &merged_graph); | |||
static Status UnfoldSubgraph(ComputeGraphPtr &root_graph, ComputeGraphPtr &parent_graph, ComputeGraph &sub_graph); | |||
static Status BuildInputMapping(GraphItem &graph_item, | |||
std::vector<NodeItem *> &data_nodes, | |||
bool is_root_graph); | |||
@@ -81,6 +81,9 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { | |||
case aicpu::FWKAdapter::FWK_ADPT_EXT_TOPIC_TYPE: | |||
GE_CHK_STATUS_RET(ParseExtTopicType(aicpu_ext_info), "[Parse][ExtTopicType] failed."); | |||
break; | |||
case aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT: | |||
GE_CHK_STATUS_RET(ParseExtAsyncWait(aicpu_ext_info), "[Parse][ExtAsyncWait] failed."); | |||
break; | |||
default: | |||
GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.", | |||
node_name_.c_str(), aicpu_ext_info->infoType, aicpu_ext_info->infoLen); | |||
@@ -101,6 +104,22 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { | |||
return SUCCESS; | |||
} | |||
Status AicpuExtInfoHandler::ParseExtAsyncWait(AicpuExtInfo *aicpu_ext_info) { | |||
if (aicpu_ext_info->infoLen != sizeof(AsyncWaitInfo)) { | |||
REPORT_INNER_ERROR("E19999", | |||
"Node[%s] parse ext async wait info failed as infoLen must be %zu but %u.", | |||
node_name_.c_str(), sizeof(AsyncWaitInfo), aicpu_ext_info->infoLen); | |||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, | |||
"[Check][DataLen]Node[%s] parse ext async wait info failed as infoLen must be %zu but %u.", | |||
node_name_.c_str(), sizeof(AsyncWaitInfo), aicpu_ext_info->infoLen); | |||
return ACL_ERROR_GE_PARAM_INVALID; | |||
} | |||
async_wait_ = reinterpret_cast<AsyncWaitInfo *>(aicpu_ext_info->infoMsg); | |||
GELOGI("Node[%s] parse async wait info success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen); | |||
return SUCCESS; | |||
} | |||
Status AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) { | |||
GE_IF_BOOL_EXEC(aicpu_ext_info->infoLen != sizeof(int32_t), | |||
REPORT_INNER_ERROR("E19999", "Node[%s] parse ext shape type failed as infoLen must be %zu but %u.", | |||
@@ -280,6 +299,17 @@ Status AicpuExtInfoHandler::UpdateSessionInfo(uint64_t session_id, uint64_t kern | |||
return SUCCESS; | |||
} | |||
Status AicpuExtInfoHandler::UpdateEventId(uint32_t event_id) { | |||
if (async_wait_ == nullptr) { | |||
REPORT_INNER_ERROR("E19999", "async_wait_ is nullptr."); | |||
GELOGE(FAILED, "[Check][async_wait_] async_wait_ is nullptr."); | |||
return FAILED; | |||
} | |||
async_wait_->waitType = 1; | |||
async_wait_->waitId = event_id; | |||
return SUCCESS; | |||
} | |||
Status AicpuExtInfoHandler::UpdateSessionInfoSessionId(uint64_t session_id) { | |||
if (session_info_ == nullptr) { | |||
GELOGD("There is no session info in ext_info, no need update."); | |||
@@ -27,6 +27,7 @@ namespace ge { | |||
namespace hybrid { | |||
using AicpuShapeAndType = aicpu::FWKAdapter::ShapeAndType; | |||
using AicpuExtInfo = aicpu::FWKAdapter::ExtInfo; | |||
using AsyncWaitInfo = aicpu::FWKAdapter::AsyncWait; | |||
using AicpuSessionInfo = SessionInfo; | |||
class AicpuExtInfoHandler { | |||
@@ -59,6 +60,8 @@ class AicpuExtInfoHandler { | |||
Status UpdateExecuteMode(bool flag); | |||
Status UpdateEventId(uint32_t event_id); | |||
Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type); | |||
bool IsNeedRefreshIOAddr(); | |||
@@ -73,6 +76,7 @@ class AicpuExtInfoHandler { | |||
Status ParseExtBitMap(AicpuExtInfo *aicpu_ext_info); | |||
Status ParseExtUpdateAddr(AicpuExtInfo *aicpu_ext_info); | |||
Status ParseExtTopicType(AicpuExtInfo *aicpu_ext_info); | |||
Status ParseExtAsyncWait(AicpuExtInfo *aicpu_ext_info); | |||
static Status UpdateShapeAndType(const GeShape &shape, | |||
DataType data_type, | |||
@@ -90,6 +94,7 @@ class AicpuExtInfoHandler { | |||
const uint32_t output_num_; | |||
UnknowShapeOpType unknown_type_; | |||
AicpuSessionInfo *session_info_ = nullptr; | |||
AsyncWaitInfo *async_wait_ = nullptr; | |||
uint64_t *bit_map_ = nullptr; | |||
uint32_t *update_addr_ = nullptr; | |||
int32_t topic_type_flag_ = -1; | |||
@@ -22,6 +22,7 @@ | |||
#include "graph/utils/node_utils.h" | |||
#include "hybrid/executor/hybrid_execution_context.h" | |||
#include "hybrid/model/hybrid_model.h" | |||
#include "runtime/rt.h" | |||
namespace ge { | |||
namespace hybrid { | |||
@@ -33,6 +34,12 @@ const char *const kAicpuAllshape = "_AllShape"; | |||
REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_TF, AiCpuNodeExecutor); | |||
REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_CUSTOM, AiCpuNodeExecutor); | |||
AicpuNodeTaskBase::~AicpuNodeTaskBase() { | |||
if (rt_event_ != nullptr) { | |||
(void)rtEventDestroy(rt_event_); | |||
} | |||
} | |||
Status AicpuNodeTaskBase::AllocTensorBuffer(size_t size, std::unique_ptr<TensorBuffer> &tensor_buffer) { | |||
auto allocator = NpuMemoryAllocator::GetAllocator(); | |||
GE_CHECK_NOTNULL(allocator); | |||
@@ -64,6 +71,13 @@ Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info, int64_ | |||
GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateSessionInfoSessionId(session_id), | |||
"[Update][SessionInfoSessionId] failed, session_id:%ld.", session_id); | |||
if (is_blocking_aicpu_op_) { | |||
if (UpdateEventIdForBlockingAicpuOp() != SUCCESS) { | |||
GELOGE(FAILED, "[Call][UpdateEventIdForBlockingAicpuOp] Call UpdateEventIdForBlockingAicpuOp failed"); | |||
return FAILED; | |||
} | |||
} | |||
// copy task args buf | |||
GE_CHK_STATUS_RET(AllocTensorBuffer(aicpu_ext_handle_.GetExtInfoLen(), ext_info_addr_dev_), | |||
"[Invoke][AllocTensorBuffer]Node[%s] alloc kernel_ext_info buf failed, size=%zu", | |||
@@ -230,6 +244,96 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||
return SUCCESS; | |||
} | |||
Status AicpuNodeTaskBase::UpdateEventIdForBlockingAicpuOp() { | |||
bool is_support = false; | |||
if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { | |||
GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); | |||
return FAILED; | |||
} | |||
if (!is_support) { | |||
GELOGD("Device not support blocking aicpu op process"); | |||
return SUCCESS; | |||
} | |||
uint32_t event_id = 0; | |||
auto rt_ret = rtEventCreateWithFlag(&rt_event_, RT_EVENT_WITH_FLAG); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtEventCreateWithFlag failed for node:%s, ret:0x%X", node_name_.c_str(), | |||
rt_ret); | |||
GELOGE(RT_FAILED, "[Call][rtEventCreateWithFlag] failed for node:%s, ret:0x%X", node_name_.c_str(), rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtGetEventID(rt_event_, &event_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtGetEventID failed for node:%s, ret:0x%X", node_name_.c_str(), rt_ret); | |||
GELOGE(RT_FAILED, "[Call][rtGetEventID] failed for node:%s, ret:0x%X", node_name_.c_str(), rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
if (aicpu_ext_handle_.UpdateEventId(event_id) != SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Update event id failed for node:%s.", node_name_.c_str()); | |||
GELOGE(FAILED, "[Update][EventId] Update event id failed for node:%s", node_name_.c_str()); | |||
return FAILED; | |||
} | |||
GELOGI("Update event_id=%u success", event_id); | |||
return SUCCESS; | |||
} | |||
Status AicpuNodeTaskBase::CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support) { | |||
int32_t device_id = 0; | |||
auto rt_ret = rtGetDevice(&device_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtGetDevice failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][rtGetDevice] failed, ret:0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
int32_t value = 0; | |||
rt_ret = rtGetDeviceCapability(device_id, FEATURE_TYPE_BLOCKING_OPERATOR, RT_MODULE_TYPE_AICPU, &value); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtGetDeviceCapability failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][rtGetDeviceCapability] failed, ret:0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
if (value != RT_AICPU_BLOCKING_OP_NOT_SUPPORT && value != RT_AICPU_BLOCKING_OP_SUPPORT) { | |||
REPORT_INNER_ERROR("E19999", "Value should be %d or %d but %d", | |||
RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); | |||
GELOGE(FAILED, "[Check][Value] Value should be %d or %d but %d", | |||
RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); | |||
return FAILED; | |||
} | |||
is_support = (value == RT_AICPU_BLOCKING_OP_SUPPORT ? true : false); | |||
return SUCCESS; | |||
} | |||
Status AicpuNodeTaskBase::DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream) { | |||
bool is_support = false; | |||
if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { | |||
GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); | |||
return FAILED; | |||
} | |||
if (!is_support) { | |||
GELOGD("Device not support blocking aicpu op process."); | |||
return SUCCESS; | |||
} | |||
GELOGD("Distribute queue task begin"); | |||
if (rt_event_ == nullptr) { | |||
REPORT_INNER_ERROR("E19999", "rt_event_ is nullptr"); | |||
GELOGE(FAILED, "[Check][rt_event_] rt_event_ is nullptr"); | |||
return FAILED; | |||
} | |||
auto rt_ret = rtStreamWaitEvent(stream, rt_event_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtEventReset(rt_event_, stream); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtEventReset failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
return SUCCESS; | |||
} | |||
Status AicpuTfNodeTask::InitForDependComputeTask() { | |||
if ((unknown_type_ != DEPEND_COMPUTE) || (node_item_->num_outputs == 0)) { | |||
GELOGD("Node[%s] type[%s] unknown_type is %d, output num is %d.", | |||
@@ -325,6 +429,9 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) { | |||
// init ext info | |||
uint64_t ext_session_id = model.GetSessionId(); | |||
const OpDescPtr op_desc = node_item_->GetOpDesc(); | |||
AttrUtils::GetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_); | |||
GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc->GetName().c_str(), is_blocking_aicpu_op_); | |||
GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id), "[Init][ExtInfo] failed for Node[%s].", | |||
node_name_.c_str()); | |||
GE_CHK_STATUS_RET(InitForDependComputeTask(), "[Init][DependComputeTask] failed for Node[%s].", node_name_.c_str()); | |||
@@ -642,6 +749,12 @@ Status AicpuTfNodeTask::LaunchTask(TaskContext &context) { | |||
kernel_buf_->GetSize(), flag, context.GetStream())); | |||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End"); | |||
GELOGD("Node[%s] launch end.", node_name_.c_str()); | |||
if (is_blocking_aicpu_op_) { | |||
if (DistributeWaitTaskForAicpuBlockingOp(context.GetStream()) != SUCCESS) { | |||
GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed"); | |||
return FAILED; | |||
} | |||
} | |||
if (need_sync_) { | |||
GELOGD("[%s] Task needs sync", node_name_.c_str()); | |||
GE_CHK_STATUS_RET_NOLOG(context.Synchronize()); | |||
@@ -760,6 +873,8 @@ Status AicpuNodeTask::Init(const HybridModel &model) { | |||
return FAILED;); | |||
uint64_t ext_session_id = model.GetSessionId(); | |||
AttrUtils::GetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_); | |||
GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc->GetName().c_str(), is_blocking_aicpu_op_); | |||
GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id), | |||
"[Init][ExtInfo] failed for Node[%s].", node_name.c_str()); | |||
@@ -826,6 +941,12 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) { | |||
args_.get(), args_size_, | |||
nullptr, context.GetStream(), flag); | |||
GE_CHK_RT_RET(rt_ret); | |||
if (is_blocking_aicpu_op_) { | |||
if (DistributeWaitTaskForAicpuBlockingOp(context.GetStream()) != SUCCESS) { | |||
GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed"); | |||
return FAILED; | |||
} | |||
} | |||
GELOGD("Node[%s] launch task end.", node_name_.c_str()); | |||
return SUCCESS; | |||
} | |||
@@ -35,7 +35,7 @@ class AicpuNodeTaskBase : public NodeTask { | |||
node_item->num_outputs, | |||
node_item->shape_inference_type) {} | |||
~AicpuNodeTaskBase() override = default; | |||
~AicpuNodeTaskBase() override; | |||
using NodeTask::Init; | |||
@@ -61,6 +61,10 @@ class AicpuNodeTaskBase : public NodeTask { | |||
static Status AllocTensorBuffer(size_t size, std::unique_ptr<TensorBuffer> &tensor_buffer); | |||
Status DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream); | |||
Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); | |||
Status UpdateEventIdForBlockingAicpuOp(); | |||
protected: | |||
const NodeItem *node_item_; | |||
// just reference. | |||
@@ -78,6 +82,10 @@ class AicpuNodeTaskBase : public NodeTask { | |||
// ext info addr, device mem | |||
std::unique_ptr<TensorBuffer> ext_info_addr_dev_; | |||
// for blocking aicpu op | |||
bool is_blocking_aicpu_op_ = false; | |||
rtEvent_t rt_event_ = nullptr; | |||
}; | |||
class AicpuTfNodeTask : public AicpuNodeTaskBase { | |||
@@ -89,7 +89,8 @@ map<string, DataType> kDataTypeDict = { | |||
{"float", DT_FLOAT}, | |||
{"float32", DT_FLOAT}, | |||
{"double", DT_DOUBLE}, | |||
{"complex64", DT_COMPLEX64} | |||
{"complex64", DT_COMPLEX64}, | |||
{"complex128", DT_COMPLEX128} | |||
}; | |||
map<string, Format> kFormatDict = { | |||
@@ -154,12 +154,16 @@ Status OpsKernelBuilderManager::CalcOpRunningParam(Node &node) const { | |||
return SUCCESS; | |||
} | |||
Status OpsKernelBuilderManager::GenerateTask(const Node &node, | |||
RunContext &context, | |||
std::vector<domi::TaskDef> &tasks) const { | |||
Status OpsKernelBuilderManager::GenerateTask(const Node &node, RunContext &context, std::vector<domi::TaskDef> &tasks, | |||
bool atomic_engine_flag) const { | |||
auto op_desc = node.GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
const std::string &lib_name = op_desc->GetOpKernelLibName(); | |||
std::string lib_name; | |||
if (atomic_engine_flag) { | |||
lib_name = op_desc->GetOpKernelLibName(); | |||
} else { | |||
(void)AttrUtils::GetStr(op_desc, ATTR_NAME_COMPOUND_ENGINE_KERNEL_LIB_NAME, lib_name); | |||
} | |||
auto it = ops_kernel_builders_.find(lib_name); | |||
if (it == ops_kernel_builders_.end()) { | |||
GELOGE(INTERNAL_ERROR, "[Find][LibName]fail for libName = %s, node:%s", lib_name.c_str(), | |||
@@ -43,8 +43,8 @@ class GE_FUNC_VISIBILITY OpsKernelBuilderManager { | |||
Status CalcOpRunningParam(Node &node) const; | |||
Status GenerateTask(const Node &node, RunContext &context, | |||
std::vector<domi::TaskDef> &tasks) const; | |||
Status GenerateTask(const Node &node, RunContext &context, std::vector<domi::TaskDef> &tasks, | |||
bool atomic_engine_flag = true) const; | |||
private: | |||
OpsKernelBuilderManager() = default; | |||
@@ -24,6 +24,7 @@ const char *const kInitialize = "Initialize"; | |||
const char *const kGetOpsKernelInfoStores = "GetOpsKernelInfoStores"; | |||
const char *const kGetGraphOptimizerObjs = "GetGraphOptimizerObjs"; | |||
const char *const kFinalize = "Finalize"; | |||
const char *const kGetCompoundEngineContains = "GetCompoundEngineContains"; | |||
std::mutex ops_kernel_info_mutex; | |||
} // namespace | |||
@@ -35,6 +36,12 @@ OpsKernelManager::OpsKernelManager() | |||
OpsKernelManager::~OpsKernelManager() { | |||
graph_optimizers_.clear(); | |||
ops_kernel_store_.clear(); | |||
atomic_graph_optimizers_.clear(); | |||
compound_graph_optimizers_.clear(); | |||
atomic_graph_optimizers_by_priority_.clear(); | |||
atomic_first_optimizers_by_priority_.clear(); | |||
compound_first_optimizers_by_priority_.clear(); | |||
compound_engine_contains_.clear(); | |||
ops_kernel_info_.clear(); | |||
} | |||
@@ -70,53 +77,48 @@ Status OpsKernelManager::Initialize(const map<string, string> &options_const) { | |||
GELOGI("OPTION_EXEC_EXTERN_PLUGIN_PATH=%s.", extern_engine_path.c_str()); | |||
op_tiling_manager_.LoadSo(); | |||
ret = plugin_manager_.LoadSo(extern_engine_path, func_check_list); | |||
if (ret == SUCCESS) { | |||
initialize_ = options; | |||
Status rst0 = plugin_manager_.InvokeAll<map<string, string> &, Status>(kInitialize, initialize_); | |||
if (rst0 == FAILED) { | |||
GELOGE(GE_OPS_GET_NO_VALID_SO, "[Invoke][OpsKernelInfo]PluginManager InvokeAll failed."); | |||
REPORT_INNER_ERROR("E19999", "PluginManager InvokeAll failed."); | |||
return GE_OPS_GET_NO_VALID_SO; | |||
} | |||
Status rst1 = | |||
plugin_manager_.InvokeAll<map<string, OpsKernelInfoStorePtr> &>(kGetOpsKernelInfoStores, ops_kernel_store_); | |||
if (rst1 != SUCCESS) { | |||
GELOGW("Initialize OpsKernelInfo failed."); | |||
} | |||
Status rst2 = | |||
plugin_manager_.InvokeAll<map<string, GraphOptimizerPtr> &>(kGetGraphOptimizerObjs, graph_optimizers_); | |||
if (rst2 != SUCCESS) { | |||
GELOGW("Initialize GraphOptimizerObjs failed."); | |||
} | |||
ret = CheckPluginPtr(); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
ret = InitOpKernelInfoStores(options); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
InitOpsKernelInfo(); | |||
ret = InitGraphOptimzers(options); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
ret = InitGraphOptimizerPriority(); | |||
if ((ret != SUCCESS)) { | |||
GELOGE(ret, "[Init][GraphOptimizerPriority] failed."); | |||
REPORT_CALL_ERROR("E19999", "InitGraphOptimizerPriority failed."); | |||
return ret; | |||
} | |||
init_flag_ = true; | |||
return SUCCESS; | |||
} else { | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "[Check][SoFile] not find any valid so file."); | |||
REPORT_INNER_ERROR("E19999", "OpsKernelManager::Initialize failed for not find any valid so file."); | |||
return ret; | |||
} | |||
initialize_ = options; | |||
if (plugin_manager_.InvokeAll<map<string, string> &, Status>(kInitialize, initialize_) == FAILED) { | |||
GELOGE(GE_OPS_GET_NO_VALID_SO, "[Invoke][OpsKernelInfo]PluginManager InvokeAll failed."); | |||
REPORT_INNER_ERROR("E19999", "PluginManager InvokeAll failed."); | |||
return GE_OPS_GET_NO_VALID_SO; | |||
} | |||
if (plugin_manager_.InvokeAll<map<string, OpsKernelInfoStorePtr> &>(kGetOpsKernelInfoStores, | |||
ops_kernel_store_) != SUCCESS) { | |||
GELOGW("Initialize OpsKernelInfo failed."); | |||
} | |||
if (plugin_manager_.InvokeAll<map<string, GraphOptimizerPtr> &>(kGetGraphOptimizerObjs, | |||
graph_optimizers_) != SUCCESS) { | |||
GELOGW("Initialize GraphOptimizerObjs failed."); | |||
} | |||
plugin_manager_. | |||
OptionalInvokeAll<std::map<std::string, std::set<std::string>> &, std::map<std::string, std::string> &>( | |||
kGetCompoundEngineContains, compound_engine_contains_, compound_engine_2_kernel_lib_name_); | |||
ret = CheckPluginPtr(); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
ret = InitOpKernelInfoStores(options); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
InitOpsKernelInfo(); | |||
ret = InitGraphOptimizers(options); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
ClassifyGraphOptimizers(); | |||
InitGraphOptimizerPriority(); | |||
init_flag_ = true; | |||
return SUCCESS; | |||
} | |||
void OpsKernelManager::GetExternalEnginePath(std::string &extern_engine_path, | |||
@@ -264,7 +266,7 @@ void OpsKernelManager::InitOpsKernelInfo() { | |||
REPORT_INNER_ERROR("E19999", "InitOpsKernelInfo failed for new GELib."); | |||
return; | |||
} | |||
// sort opinfo of ops_kernel_info_ | |||
// sort op_info of ops_kernel_info_ | |||
for (auto &it : ops_kernel_info_) { | |||
if (it.second.empty()) { | |||
continue; | |||
@@ -293,31 +295,30 @@ void OpsKernelManager::InitOpsKernelInfo() { | |||
GELOGI("Init opsKernelInfo finished, size is %zu", ops_kernel_info_.size()); | |||
} | |||
Status OpsKernelManager::InitGraphOptimzers(const map<string, string> &options) { | |||
Status OpsKernelManager::InitGraphOptimizers(const map<string, string> &options) { | |||
GELOGI("Init graph optimizers options count %zu", options.size()); | |||
for (const auto &option : options) { | |||
GELOGI("Init graph optimizers option %s: %s", option.first.c_str(), option.second.c_str()); | |||
} | |||
GELOGI("The number of GraphOptimzerObjs are %zu.", graph_optimizers_.size()); | |||
GELOGI("The number of GraphOptimizerObjs are %zu.", graph_optimizers_.size()); | |||
for (const auto &it : graph_optimizers_) { | |||
GELOGI("GraphOptimzer name: %s.", (it.first).c_str()); | |||
GELOGI("GraphOptimizer name: %s.", (it.first).c_str()); | |||
GraphOptimizerAttribute attrs; | |||
GE_CHK_STATUS_RET(it.second->GetAttributes(attrs)) | |||
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
if (instance_ptr == nullptr) { | |||
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Get][GELib]malloc instance_ptr failed."); | |||
REPORT_INNER_ERROR("E19999", "InitGraphOptimzers failed for new GELib."); | |||
REPORT_INNER_ERROR("E19999", "InitGraphOptimizers failed for new GELib."); | |||
return GE_CLI_GE_NOT_INITIALIZED; | |||
} | |||
if (!instance_ptr->DNNEngineManagerObj().IsEngineRegistered(attrs.engineName)) { | |||
GELOGW("Engine: %s is not registered.", attrs.engineName.c_str()); | |||
continue; | |||
} | |||
Status ret = it.second->Initialize(options); | |||
if (ret != SUCCESS) { | |||
GELOGE(GE_OPS_GRAPH_OPTIMIZER_INIT_FAILED, | |||
"[Init][GraphOptimzer]GraphOptimzer: %s initialize failed.", (it.first).c_str()); | |||
REPORT_CALL_ERROR("E19999", "InitGraphOptimzers failed. %s initialize failed.", (it.first).c_str()); | |||
if (it.second->Initialize(options) != SUCCESS) { | |||
GELOGE(GE_OPS_GRAPH_OPTIMIZER_INIT_FAILED, | |||
"[Init][GraphOptimizer] GraphOptimizer: %s initialize failed.", (it.first).c_str()); | |||
REPORT_CALL_ERROR("E19999", "InitGraphOptimizers failed. %s initialize failed.", (it.first).c_str()); | |||
return GE_OPS_GRAPH_OPTIMIZER_INIT_FAILED; | |||
} | |||
} | |||
@@ -340,11 +341,11 @@ Status OpsKernelManager::Finalize() { | |||
} | |||
} | |||
for (auto iter = graph_optimizers_.begin(); iter != graph_optimizers_.end(); ++iter) { | |||
GELOGI("GraphOptimzers finalize, name: %s.", (iter->first).c_str()); | |||
GELOGI("GraphOptimizer finalize, name: %s.", (iter->first).c_str()); | |||
Status status = iter->second->Finalize(); | |||
if (status != SUCCESS) { | |||
GELOGE(status, "[Check][Status]GraphOptimzers finalize failed, name: %s.", (iter->first).c_str()); | |||
REPORT_CALL_ERROR("E19999", "GraphOptimzers finalize failed, name: %s.", (iter->first).c_str()); | |||
GELOGE(status, "[Check][Status] GraphOptimizer finalize failed, name: %s.", (iter->first).c_str()); | |||
REPORT_CALL_ERROR("E19999", "GraphOptimizer finalize failed, name: %s.", (iter->first).c_str()); | |||
return status; | |||
} | |||
} | |||
@@ -398,8 +399,12 @@ const map<string, OpsKernelInfoStorePtr> &OpsKernelManager::GetAllOpsKernelInfoS | |||
const map<string, GraphOptimizerPtr> &OpsKernelManager::GetAllGraphOptimizerObjs() const { return graph_optimizers_; } | |||
const vector<pair<string, GraphOptimizerPtr>> &OpsKernelManager::GetAllGraphOptimizerObjsByPriority() const { | |||
return graph_optimizers_by_priority_; | |||
const vector<pair<string, GraphOptimizerPtr>> &OpsKernelManager::GetAllGraphOptimizerObjsByPriority(bool atomic_first_flag) const { | |||
if (atomic_first_flag) { | |||
return atomic_first_optimizers_by_priority_; | |||
} else { | |||
return compound_first_optimizers_by_priority_; | |||
} | |||
} | |||
void OpsKernelManager::GetGraphOptimizerByEngine(const std::string &engine_name, | |||
@@ -407,11 +412,11 @@ void OpsKernelManager::GetGraphOptimizerByEngine(const std::string &engine_name, | |||
for (const auto &it : graph_optimizers_) { | |||
GraphOptimizerAttribute attrs; | |||
if (it.second->GetAttributes(attrs) != SUCCESS) { | |||
GELOGW("Get GraphOptimzer name: %s attributes failed.", (it.first).c_str()); | |||
GELOGW("Get GraphOptimizer name: %s attributes failed.", (it.first).c_str()); | |||
continue; | |||
} | |||
if (attrs.engineName == engine_name) { | |||
GELOGD("GetGraphOptimizerByEngine GraphOptimzer name: %s, engineName: %s", (it.first).c_str(), | |||
GELOGD("GetGraphOptimizerByEngine GraphOptimizer name: %s, engineName: %s", (it.first).c_str(), | |||
attrs.engineName.c_str()); | |||
graph_optimizer.push_back(it.second); | |||
} | |||
@@ -428,39 +433,62 @@ bool OpsKernelManager::GetEnableAICPUFlag() const { return enable_aicpu_flag_; } | |||
bool OpsKernelManager::GetEnablePluginFlag() const { return (enable_fe_flag_ || enable_aicpu_flag_); } | |||
Status OpsKernelManager::InitGraphOptimizerPriority() { | |||
void OpsKernelManager::ClassifyGraphOptimizers() { | |||
if (compound_engine_contains_.empty()) { | |||
atomic_graph_optimizers_ = graph_optimizers_; | |||
compound_graph_optimizers_.clear(); | |||
return; | |||
} | |||
for (const auto &item : graph_optimizers_) { | |||
if (compound_engine_contains_.find(item.first) != compound_engine_contains_.end()) { | |||
GELOGI("Engine %s is a compound engine.", item.first.c_str()); | |||
compound_graph_optimizers_.emplace(item); | |||
} else { | |||
GELOGI("Engine %s is an atomic engine.", item.first.c_str()); | |||
atomic_graph_optimizers_.emplace(item); | |||
} | |||
} | |||
} | |||
void OpsKernelManager::InitGraphOptimizerPriority() { | |||
string priority_conf_path = "plugin/opskernel/optimizer_priority.pbtxt"; | |||
string path = PluginManager::GetPath(); | |||
path.append(priority_conf_path); | |||
optimizers::Priority optimizerPriority; | |||
bool ret = ReadProtoFromText(path.c_str(), &optimizerPriority); | |||
if (!ret) { | |||
if (!ReadProtoFromText(path.c_str(), &optimizerPriority)) { | |||
GELOGW("Read priority file failed. Follow loading sequence."); | |||
return SUCCESS; | |||
return; | |||
} | |||
auto priorities = optimizerPriority.optimizer(); | |||
if (priorities.empty()) { | |||
GELOGI("No priority file config. Follow loading sequence."); | |||
return SUCCESS; | |||
return; | |||
} | |||
// sort optimizer map by priority | |||
std::stringstream priority_seq; | |||
for (const auto optimizer_name : priorities) { | |||
auto name_to_optimizer_pair = graph_optimizers_.find(optimizer_name); | |||
if (name_to_optimizer_pair != graph_optimizers_.end()) { | |||
graph_optimizers_by_priority_.emplace_back(*name_to_optimizer_pair); | |||
auto name_to_optimizer_pair = atomic_graph_optimizers_.find(optimizer_name); | |||
if (name_to_optimizer_pair != atomic_graph_optimizers_.end()) { | |||
atomic_graph_optimizers_by_priority_.emplace_back(*name_to_optimizer_pair); | |||
priority_seq << optimizer_name.c_str() << ' '; | |||
} else { | |||
GELOGW("Unknown optimizer %s show up in priority config file. Please check.", optimizer_name.c_str()); | |||
} | |||
} | |||
GELOGI("Graph Optimizers priority initialized. The sequence will follow : %s.", priority_seq.str().c_str()); | |||
return SUCCESS; | |||
GELOGI("Atomic graph Optimizers priority initialized. The sequence will follow : %s.", priority_seq.str().c_str()); | |||
atomic_first_optimizers_by_priority_ = atomic_graph_optimizers_by_priority_; | |||
for (const auto &item : compound_graph_optimizers_) { | |||
atomic_first_optimizers_by_priority_.emplace_back(std::make_pair(item.first, item.second)); | |||
compound_first_optimizers_by_priority_.emplace_back(std::make_pair(item.first, item.second)); | |||
} | |||
for (const auto &item : atomic_graph_optimizers_by_priority_) { | |||
compound_first_optimizers_by_priority_.emplace_back(std::make_pair(item.first, item.second)); | |||
} | |||
} | |||
Status OpsKernelManager::FinalizeOpsKernel() { | |||
GELOGI("ge invoke ops kernal finalize."); | |||
GELOGI("ge invoke ops kernel finalize."); | |||
Status ret = plugin_manager_.InvokeAll<Status>(kFinalize); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "[Finalize][Check][Status] invoke Fe finalize failed."); | |||
@@ -18,10 +18,12 @@ | |||
#define GE_OPSKERNEL_MANAGER_OPS_KERNEL_MANAGER_H_ | |||
#include <map> | |||
#include <set> | |||
#include <memory> | |||
#include <string> | |||
#include <vector> | |||
#include <mutex> | |||
#include <set> | |||
#include "framework/common/debug/log.h" | |||
#include "common/ge/plugin_manager.h" | |||
@@ -61,7 +63,25 @@ class GE_FUNC_VISIBILITY OpsKernelManager { | |||
const map<string, GraphOptimizerPtr> &GetAllGraphOptimizerObjs() const; | |||
// get all graph_optimizer by priority | |||
const vector<pair<string, GraphOptimizerPtr>> &GetAllGraphOptimizerObjsByPriority() const; | |||
const vector<pair<string, GraphOptimizerPtr>> &GetAllGraphOptimizerObjsByPriority(bool atomic_first_flag = true) const; | |||
// get atomic_engine graph_optimizer by priority | |||
const vector<pair<string, GraphOptimizerPtr>> &GetAtomicGraphOptimizerObjsByPriority() const { | |||
return atomic_graph_optimizers_by_priority_; | |||
} | |||
// get compound_engine graph_optimizer | |||
const map<string, GraphOptimizerPtr> &GetCompoundGraphOptimizerObjs() const { | |||
return compound_graph_optimizers_; | |||
} | |||
const map<string, std::set<std::string>> &GetCompoundEngineContains() const { | |||
return compound_engine_contains_; | |||
} | |||
const std::map<std::string, std::string> &GetCompoundEngineKernelLibName() const { | |||
return compound_engine_2_kernel_lib_name_; | |||
} | |||
// get subgraphOptimizer by engine name | |||
void GetGraphOptimizerByEngine(const std::string &engine_name, vector<GraphOptimizerPtr> &graph_optimizer); | |||
@@ -93,15 +113,15 @@ class GE_FUNC_VISIBILITY OpsKernelManager { | |||
void InitOpsKernelInfo(); | |||
Status InitGraphOptimzers(const map<string, string> &options); | |||
Status InitGraphOptimizers(const map<string, string> &options); | |||
Status InitPluginOptions(const map<string, string> &options); | |||
Status ParsePluginOptions(const map<string, string> &options, const string &plugin_name, bool &enable_flag); | |||
Status LoadGEGraphOptimizer(map<string, GraphOptimizerPtr>& graphOptimizer); | |||
void ClassifyGraphOptimizers(); | |||
Status InitGraphOptimizerPriority(); | |||
void InitGraphOptimizerPriority(); | |||
// Finalize other ops kernel resource | |||
Status FinalizeOpsKernel(); | |||
@@ -112,8 +132,20 @@ class GE_FUNC_VISIBILITY OpsKernelManager { | |||
map<string, OpsKernelInfoStorePtr> ops_kernel_store_{}; | |||
// graph_optimizer | |||
map<string, GraphOptimizerPtr> graph_optimizers_{}; | |||
// ordered graph_optimzer | |||
vector<pair<string, GraphOptimizerPtr>> graph_optimizers_by_priority_{}; | |||
// compound_graph_optimizer | |||
map<string, GraphOptimizerPtr> compound_graph_optimizers_{}; | |||
// atomic_graph_optimizer | |||
map<string, GraphOptimizerPtr> atomic_graph_optimizers_{}; | |||
// ordered atomic_graph_optimizer | |||
vector<pair<string, GraphOptimizerPtr>> atomic_graph_optimizers_by_priority_{}; | |||
// atomic_first graph_optimizer | |||
vector<pair<string, GraphOptimizerPtr>> atomic_first_optimizers_by_priority_{}; | |||
// compound_first graph_optimizer | |||
vector<pair<string, GraphOptimizerPtr>> compound_first_optimizers_by_priority_{}; | |||
// {compound_engine, {containing atomic engines}} | |||
std::map<std::string, std::set<std::string>> compound_engine_contains_{}; | |||
// {compound_engine, compound_engine_kernel_lib_name} | |||
std::map<std::string, std::string> compound_engine_2_kernel_lib_name_{}; | |||
// opsKernelInfo | |||
map<string, vector<OpInfo>> ops_kernel_info_{}; | |||
@@ -16,9 +16,7 @@ | |||
#include "plugin/engine/dnnengines.h" | |||
#include <map> | |||
#include <string> | |||
#include <vector> | |||
namespace ge { | |||
AICoreDNNEngine::AICoreDNNEngine(const std::string &engine_name) { | |||
@@ -29,14 +27,6 @@ AICoreDNNEngine::AICoreDNNEngine(const std::string &engine_name) { | |||
engine_attribute_.engine_output_format = FORMAT_RESERVED; | |||
} | |||
AICoreDNNEngine::AICoreDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; } | |||
Status AICoreDNNEngine::Initialize(const std::map<std::string, std::string> &options) { return SUCCESS; } | |||
Status AICoreDNNEngine::Finalize() { return SUCCESS; } | |||
void AICoreDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; } | |||
VectorCoreDNNEngine::VectorCoreDNNEngine(const std::string &engine_name) { | |||
engine_attribute_.engine_name = engine_name; | |||
engine_attribute_.compute_cost = COST_1; | |||
@@ -45,14 +35,6 @@ VectorCoreDNNEngine::VectorCoreDNNEngine(const std::string &engine_name) { | |||
engine_attribute_.engine_output_format = FORMAT_RESERVED; | |||
} | |||
VectorCoreDNNEngine::VectorCoreDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; } | |||
Status VectorCoreDNNEngine::Initialize(const std::map<std::string, std::string> &options) { return SUCCESS; } | |||
Status VectorCoreDNNEngine::Finalize() { return SUCCESS; } | |||
void VectorCoreDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; } | |||
AICpuDNNEngine::AICpuDNNEngine(const std::string &engine_name) { | |||
engine_attribute_.engine_name = engine_name; | |||
engine_attribute_.compute_cost = COST_2; | |||
@@ -61,14 +43,6 @@ AICpuDNNEngine::AICpuDNNEngine(const std::string &engine_name) { | |||
engine_attribute_.engine_output_format = FORMAT_RESERVED; | |||
} | |||
AICpuDNNEngine::AICpuDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; } | |||
Status AICpuDNNEngine::Initialize(const std::map<std::string, std::string> &options) { return SUCCESS; } | |||
Status AICpuDNNEngine::Finalize() { return SUCCESS; } | |||
void AICpuDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; } | |||
AICpuTFDNNEngine::AICpuTFDNNEngine(const std::string &engine_name) { | |||
engine_attribute_.engine_name = engine_name; | |||
engine_attribute_.compute_cost = COST_3; | |||
@@ -77,28 +51,12 @@ AICpuTFDNNEngine::AICpuTFDNNEngine(const std::string &engine_name) { | |||
engine_attribute_.engine_output_format = FORMAT_RESERVED; | |||
} | |||
AICpuTFDNNEngine::AICpuTFDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; } | |||
Status AICpuTFDNNEngine::Initialize(const std::map<std::string, std::string> &options) { return SUCCESS; } | |||
Status AICpuTFDNNEngine::Finalize() { return SUCCESS; } | |||
void AICpuTFDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; } | |||
GeLocalDNNEngine::GeLocalDNNEngine(const std::string &engine_name) { | |||
engine_attribute_.engine_name = engine_name; | |||
engine_attribute_.engine_input_format = FORMAT_RESERVED; | |||
engine_attribute_.engine_output_format = FORMAT_RESERVED; | |||
} | |||
GeLocalDNNEngine::GeLocalDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; } | |||
Status GeLocalDNNEngine::Initialize(const std::map<std::string, std::string> &options) { return SUCCESS; } | |||
Status GeLocalDNNEngine::Finalize() { return SUCCESS; } | |||
void GeLocalDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; } | |||
HostCpuDNNEngine::HostCpuDNNEngine(const std::string &engine_name) { | |||
engine_attribute_.engine_name = engine_name; | |||
engine_attribute_.compute_cost = COST_10; | |||
@@ -107,39 +65,21 @@ HostCpuDNNEngine::HostCpuDNNEngine(const std::string &engine_name) { | |||
engine_attribute_.engine_output_format = FORMAT_RESERVED; | |||
} | |||
HostCpuDNNEngine::HostCpuDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; } | |||
Status HostCpuDNNEngine::Initialize(const std::map<std::string, std::string> &options) { return SUCCESS; } | |||
Status HostCpuDNNEngine::Finalize() { return SUCCESS; } | |||
void HostCpuDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; } | |||
RtsDNNEngine::RtsDNNEngine(const std::string &engine_name) { | |||
engine_attribute_.engine_name = engine_name; | |||
engine_attribute_.engine_input_format = FORMAT_RESERVED; | |||
engine_attribute_.engine_output_format = FORMAT_RESERVED; | |||
} | |||
RtsDNNEngine::RtsDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; } | |||
Status RtsDNNEngine::Initialize(const std::map<std::string, std::string> &options) { return SUCCESS; } | |||
Status RtsDNNEngine::Finalize() { return SUCCESS; } | |||
void RtsDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; } | |||
HcclDNNEngine::HcclDNNEngine(const std::string &engine_name) { | |||
engine_attribute_.engine_name = engine_name; | |||
engine_attribute_.engine_input_format = FORMAT_RESERVED; | |||
engine_attribute_.engine_output_format = FORMAT_RESERVED; | |||
} | |||
HcclDNNEngine::HcclDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; } | |||
Status HcclDNNEngine::Initialize(const std::map<std::string, std::string> &options) { return SUCCESS; } | |||
Status HcclDNNEngine::Finalize() { return SUCCESS; } | |||
void HcclDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; } | |||
FftsPlusDNNEngine::FftsPlusDNNEngine(const std::string &engine_name) { | |||
engine_attribute_.engine_name = engine_name; | |||
engine_attribute_.engine_input_format = FORMAT_RESERVED; | |||
engine_attribute_.engine_output_format = FORMAT_RESERVED; | |||
} | |||
} // namespace ge |
@@ -27,123 +27,66 @@ | |||
namespace ge { | |||
class GE_FUNC_VISIBILITY AICoreDNNEngine : public DNNEngine { | |||
public: | |||
AICoreDNNEngine() = default; | |||
explicit AICoreDNNEngine(const std::string &engine_name); | |||
explicit AICoreDNNEngine(const DNNEngineAttribute &attrs); | |||
~AICoreDNNEngine() = default; | |||
Status Initialize(const std::map<std::string, std::string> &options); | |||
Status Finalize(); | |||
void GetAttributes(DNNEngineAttribute &attr) const; | |||
private: | |||
DNNEngineAttribute engine_attribute_; | |||
explicit AICoreDNNEngine(const DNNEngineAttribute &attrs) : DNNEngine(attrs) {} | |||
~AICoreDNNEngine() override = default; | |||
}; | |||
class GE_FUNC_VISIBILITY VectorCoreDNNEngine : public DNNEngine { | |||
public: | |||
VectorCoreDNNEngine() = default; | |||
explicit VectorCoreDNNEngine(const std::string &engine_name); | |||
explicit VectorCoreDNNEngine(const DNNEngineAttribute &attrs); | |||
~VectorCoreDNNEngine() = default; | |||
Status Initialize(const std::map<std::string, std::string> &options); | |||
Status Finalize(); | |||
void GetAttributes(DNNEngineAttribute &attr) const; | |||
private: | |||
DNNEngineAttribute engine_attribute_; | |||
explicit VectorCoreDNNEngine(const DNNEngineAttribute &attrs) : DNNEngine(attrs) {} | |||
~VectorCoreDNNEngine() override = default; | |||
}; | |||
class GE_FUNC_VISIBILITY AICpuDNNEngine : public DNNEngine { | |||
public: | |||
AICpuDNNEngine() = default; | |||
explicit AICpuDNNEngine(const std::string &engine_name); | |||
explicit AICpuDNNEngine(const DNNEngineAttribute &attrs); | |||
~AICpuDNNEngine() = default; | |||
Status Initialize(const std::map<std::string, std::string> &options); | |||
Status Finalize(); | |||
void GetAttributes(DNNEngineAttribute &attr) const; | |||
private: | |||
DNNEngineAttribute engine_attribute_; | |||
explicit AICpuDNNEngine(const DNNEngineAttribute &attrs) : DNNEngine(attrs) {} | |||
~AICpuDNNEngine() override = default; | |||
}; | |||
class GE_FUNC_VISIBILITY AICpuTFDNNEngine : public DNNEngine { | |||
public: | |||
AICpuTFDNNEngine() = default; | |||
explicit AICpuTFDNNEngine(const std::string &engine_name); | |||
explicit AICpuTFDNNEngine(const DNNEngineAttribute &attrs); | |||
~AICpuTFDNNEngine() = default; | |||
Status Initialize(const std::map<std::string, std::string> &options); | |||
Status Finalize(); | |||
void GetAttributes(DNNEngineAttribute &attr) const; | |||
private: | |||
DNNEngineAttribute engine_attribute_; | |||
explicit AICpuTFDNNEngine(const DNNEngineAttribute &attrs) : DNNEngine(attrs) {} | |||
~AICpuTFDNNEngine() override = default; | |||
}; | |||
class GE_FUNC_VISIBILITY GeLocalDNNEngine : public DNNEngine { | |||
public: | |||
GeLocalDNNEngine() = default; | |||
explicit GeLocalDNNEngine(const std::string &engine_name); | |||
explicit GeLocalDNNEngine(const DNNEngineAttribute &attrs); | |||
~GeLocalDNNEngine() = default; | |||
Status Initialize(const std::map<std::string, std::string> &options); | |||
Status Finalize(); | |||
void GetAttributes(DNNEngineAttribute &attr) const; | |||
private: | |||
DNNEngineAttribute engine_attribute_; | |||
explicit GeLocalDNNEngine(const DNNEngineAttribute &attrs) : DNNEngine(attrs) {} | |||
~GeLocalDNNEngine() override = default; | |||
}; | |||
class GE_FUNC_VISIBILITY HostCpuDNNEngine : public DNNEngine { | |||
public: | |||
HostCpuDNNEngine() = default; | |||
explicit HostCpuDNNEngine(const std::string &engine_name); | |||
explicit HostCpuDNNEngine(const DNNEngineAttribute &attrs); | |||
~HostCpuDNNEngine() = default; | |||
Status Initialize(const std::map<std::string, std::string> &options); | |||
Status Finalize(); | |||
void GetAttributes(DNNEngineAttribute &attr) const; | |||
private: | |||
DNNEngineAttribute engine_attribute_; | |||
explicit HostCpuDNNEngine(const DNNEngineAttribute &attrs) : DNNEngine(attrs) {} | |||
~HostCpuDNNEngine() override = default; | |||
}; | |||
class GE_FUNC_VISIBILITY RtsDNNEngine : public DNNEngine { | |||
public: | |||
RtsDNNEngine() = default; | |||
explicit RtsDNNEngine(const std::string &engine_name); | |||
explicit RtsDNNEngine(const DNNEngineAttribute &attrs); | |||
~RtsDNNEngine() = default; | |||
Status Initialize(const std::map<std::string, std::string> &options); | |||
Status Finalize(); | |||
void GetAttributes(DNNEngineAttribute &attr) const; | |||
private: | |||
DNNEngineAttribute engine_attribute_; | |||
explicit RtsDNNEngine(const DNNEngineAttribute &attrs) : DNNEngine(attrs) {} | |||
~RtsDNNEngine() override = default; | |||
}; | |||
class GE_FUNC_VISIBILITY HcclDNNEngine : public DNNEngine { | |||
public: | |||
HcclDNNEngine() = default; | |||
explicit HcclDNNEngine(const std::string &engine_name); | |||
explicit HcclDNNEngine(const DNNEngineAttribute &attrs); | |||
~HcclDNNEngine() = default; | |||
Status Initialize(const std::map<std::string, std::string> &options); | |||
Status Finalize(); | |||
void GetAttributes(DNNEngineAttribute &attr) const; | |||
explicit HcclDNNEngine(const DNNEngineAttribute &attrs) : DNNEngine(attrs) {} | |||
~HcclDNNEngine() override = default; | |||
}; | |||
private: | |||
DNNEngineAttribute engine_attribute_; | |||
class GE_FUNC_VISIBILITY FftsPlusDNNEngine : public DNNEngine { | |||
public: | |||
explicit FftsPlusDNNEngine(const std::string &engine_name); | |||
explicit FftsPlusDNNEngine(const DNNEngineAttribute &attrs) : DNNEngine(attrs) {} | |||
~FftsPlusDNNEngine() override = default; | |||
}; | |||
} // namespace ge | |||
#endif // GE_PLUGIN_ENGINE_DNNENGINES_H_ |
@@ -63,7 +63,13 @@ void RegisterAiCoreEngine() { | |||
const std::string ai_core = "AIcoreEngine"; | |||
std::vector<std::string> mem_type_aicore; | |||
mem_type_aicore.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); | |||
DNNEngineAttribute attr_aicore = {ai_core, mem_type_aicore, COST_0, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; | |||
DNNEngineAttribute attr_aicore = { ai_core, | |||
mem_type_aicore, | |||
COST_0, | |||
DEVICE, | |||
FORMAT_RESERVED, | |||
FORMAT_RESERVED, | |||
true }; | |||
DNNEnginePtr aicore_engine_ptr = MakeShared<AICoreDNNEngine>(attr_aicore); | |||
if (aicore_engine_ptr == nullptr) { | |||
GELOGE(ge::FAILED, "[Register][AiCoreEngine] failed, as malloc shared_ptr failed."); | |||
@@ -79,8 +85,13 @@ void RegisterVectorEngine() { | |||
const std::string vector_core = "VectorEngine"; | |||
std::vector<std::string> mem_type_aivcore; | |||
mem_type_aivcore.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); | |||
DNNEngineAttribute attr_vector_core = {vector_core, mem_type_aivcore, COST_1, | |||
DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; | |||
DNNEngineAttribute attr_vector_core = { vector_core, | |||
mem_type_aivcore, | |||
COST_1, | |||
DEVICE, | |||
FORMAT_RESERVED, | |||
FORMAT_RESERVED, | |||
true }; | |||
DNNEnginePtr vectorcore_engine_ptr = MakeShared<VectorCoreDNNEngine>(attr_vector_core); | |||
if (vectorcore_engine_ptr == nullptr) { | |||
GELOGE(ge::FAILED, "[Register][VectorEngine] failed, as malloc shared_ptr failed."); | |||
@@ -97,7 +108,13 @@ void RegisterAiCpuEngine() { | |||
std::vector<std::string> mem_type_aicpu; | |||
mem_type_aicpu.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); | |||
DNNEngineAttribute attr_aicpu = {vm_aicpu, mem_type_aicpu, COST_2, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; | |||
DNNEngineAttribute attr_aicpu = { vm_aicpu, | |||
mem_type_aicpu, | |||
COST_2, | |||
DEVICE, | |||
FORMAT_RESERVED, | |||
FORMAT_RESERVED, | |||
true }; | |||
DNNEnginePtr vm_engine_ptr = MakeShared<AICpuDNNEngine>(attr_aicpu); | |||
if (vm_engine_ptr == nullptr) { | |||
@@ -115,8 +132,13 @@ void RegisterAiCpuTFEngine() { | |||
std::vector<std::string> mem_type_aicpu_tf; | |||
mem_type_aicpu_tf.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); | |||
DNNEngineAttribute attr_aicpu_tf = {vm_aicpu_tf, mem_type_aicpu_tf, COST_3, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; | |||
DNNEngineAttribute attr_aicpu_tf = { vm_aicpu_tf, | |||
mem_type_aicpu_tf, | |||
COST_3, | |||
DEVICE, | |||
FORMAT_RESERVED, | |||
FORMAT_RESERVED, | |||
true }; | |||
DNNEnginePtr vm_engine_ptr = MakeShared<AICpuTFDNNEngine>(attr_aicpu_tf); | |||
if (vm_engine_ptr == nullptr) { | |||
GELOGE(ge::FAILED, "[Register][AiCpuTFEngine]make vm_engine_ptr failed"); | |||
@@ -133,7 +155,13 @@ void RegisterGeLocalEngine() { | |||
std::vector<std::string> mem_type_ge_local; | |||
mem_type_ge_local.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); | |||
// GeLocal use minimum priority, set it as 9 | |||
DNNEngineAttribute attr_ge_local = {vm_ge_local, mem_type_ge_local, COST_9, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; | |||
DNNEngineAttribute attr_ge_local = { vm_ge_local, | |||
mem_type_ge_local, | |||
COST_9, | |||
DEVICE, | |||
FORMAT_RESERVED, | |||
FORMAT_RESERVED, | |||
true }; | |||
DNNEnginePtr ge_local_engine = MakeShared<GeLocalDNNEngine>(attr_ge_local); | |||
if (ge_local_engine == nullptr) { | |||
GELOGE(ge::FAILED, "[Register][GeLocalEngine] failed, as malloc shared_ptr failed."); | |||
@@ -150,8 +178,13 @@ void RegisterHostCpuEngine() { | |||
std::vector<std::string> mem_type_host_cpu; | |||
mem_type_host_cpu.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); | |||
// HostCpu use minimum priority, set it as 10 | |||
DNNEngineAttribute attr_host_cpu = {vm_host_cpu, mem_type_host_cpu, COST_10, | |||
HOST, FORMAT_RESERVED, FORMAT_RESERVED}; | |||
DNNEngineAttribute attr_host_cpu = { vm_host_cpu, | |||
mem_type_host_cpu, | |||
COST_10, | |||
HOST, | |||
FORMAT_RESERVED, | |||
FORMAT_RESERVED, | |||
true }; | |||
DNNEnginePtr host_cpu_engine = MakeShared<HostCpuDNNEngine>(attr_host_cpu); | |||
if (host_cpu_engine == nullptr) { | |||
GELOGE(ge::FAILED, "[Register][HostCpuEngine] failed, as malloc shared_ptr failed."); | |||
@@ -167,7 +200,13 @@ void RegisterRtsEngine() { | |||
const std::string vm_rts = "DNN_VM_RTS"; | |||
std::vector<std::string> mem_type_rts; | |||
mem_type_rts.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); | |||
DNNEngineAttribute attr_rts = {vm_rts, mem_type_rts, COST_1, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; | |||
DNNEngineAttribute attr_rts = { vm_rts, | |||
mem_type_rts, | |||
COST_1, | |||
DEVICE, | |||
FORMAT_RESERVED, | |||
FORMAT_RESERVED, | |||
true }; | |||
DNNEnginePtr rts_engine = MakeShared<RtsDNNEngine>(attr_rts); | |||
if (rts_engine == nullptr) { | |||
GELOGE(ge::FAILED, "[Register][RtsEngine] failed, as malloc shared_ptr failed."); | |||
@@ -183,7 +222,13 @@ void RegisterHcclEngine() { | |||
const std::string dnn_hccl = "DNN_HCCL"; | |||
std::vector<std::string> mem_type_hccl; | |||
mem_type_hccl.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); | |||
DNNEngineAttribute attr_hccl = {dnn_hccl, mem_type_hccl, COST_1, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; | |||
DNNEngineAttribute attr_hccl = { dnn_hccl, | |||
mem_type_hccl, | |||
COST_1, | |||
DEVICE, | |||
FORMAT_RESERVED, | |||
FORMAT_RESERVED, | |||
true }; | |||
DNNEnginePtr hccl_engine = MakeShared<HcclDNNEngine>(attr_hccl); | |||
if (hccl_engine == nullptr) { | |||
GELOGE(ge::FAILED, "[Register][HcclEngine] failed, as malloc shared_ptr failed."); | |||
@@ -195,6 +240,28 @@ void RegisterHcclEngine() { | |||
} | |||
} | |||
void RegisterFftsPlusEngine() { | |||
const std::string dnn_ffts_plus = "DNN_FFTS_PLUS"; | |||
std::vector<std::string> mem_type_ffts_plus; | |||
mem_type_ffts_plus.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); | |||
DNNEngineAttribute attr_ffts_plus = { dnn_ffts_plus, | |||
mem_type_ffts_plus, | |||
COST_0, | |||
DEVICE, | |||
FORMAT_RESERVED, | |||
FORMAT_RESERVED, | |||
false }; | |||
DNNEnginePtr ffts_plus_engine = MakeShared<FftsPlusDNNEngine>(attr_ffts_plus); | |||
if (ffts_plus_engine == nullptr) { | |||
GELOGE(ge::FAILED, "[Register][FftsPlusDNNEngine] failed, as malloc shared_ptr failed."); | |||
REPORT_INNER_ERROR("E19999", "RegisterFftsPlusEngine failed for new DNNEnginePtr failed."); | |||
return; | |||
} | |||
if (EngineManager::RegisterEngine(dnn_ffts_plus, ffts_plus_engine) != SUCCESS) { | |||
GELOGW("register ffts_plus_engine failed"); | |||
} | |||
} | |||
void GetDNNEngineObjs(std::map<std::string, DNNEnginePtr> &engines) { | |||
RegisterAiCoreEngine(); | |||
RegisterVectorEngine(); | |||
@@ -204,6 +271,7 @@ void GetDNNEngineObjs(std::map<std::string, DNNEnginePtr> &engines) { | |||
RegisterHostCpuEngine(); | |||
RegisterRtsEngine(); | |||
RegisterHcclEngine(); | |||
RegisterFftsPlusEngine(); | |||
for (auto it = EngineManager::engine_map_->begin(); it != EngineManager::engine_map_->end(); ++it) { | |||
GELOGI("get engine %s from engine plugin.", it->first.c_str()); | |||
@@ -564,6 +564,41 @@ AiCpuBaseTask::~AiCpuBaseTask() { | |||
if (ext_info_addr_dev_ != nullptr) { | |||
(void)rtFree(ext_info_addr_dev_); | |||
} | |||
if (rt_event_ != nullptr) { | |||
(void)rtEventDestroy(rt_event_); | |||
} | |||
} | |||
Status AiCpuBaseTask::UpdateEventIdForBlockingAicpuOp() { | |||
bool is_support = false; | |||
if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { | |||
GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); | |||
return FAILED; | |||
} | |||
if (!is_support) { | |||
GELOGD("Device not support blocking aicpu op process"); | |||
return SUCCESS; | |||
} | |||
uint32_t event_id = 0; | |||
auto rt_ret = rtEventCreateWithFlag(&rt_event_, RT_EVENT_WITH_FLAG); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtEventCreateWithFlag failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][rtEventCreateWithFlag] failed, ret:0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtGetEventID(rt_event_, &event_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtGetEventID failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][rtGetEventID] failed, ret:0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
if (aicpu_ext_handle_->UpdateEventId(event_id) != SUCCESS) { | |||
REPORT_CALL_ERROR("E19999", "Update event id=%u failed.", event_id); | |||
GELOGE(FAILED, "[Update][EventId] Update event id failed", event_id); | |||
return FAILED; | |||
} | |||
GELOGI("Update event_id=%u success", event_id); | |||
return SUCCESS; | |||
} | |||
Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint64_t kernel_id) { | |||
@@ -577,6 +612,9 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint | |||
GELOGD("Get unknown_type is %d.", unknown_shape_type_val); | |||
unknown_type_ = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | |||
AttrUtils::GetBool(op_desc_, ATTR_NAME_IS_BLOCKING_OP, is_blocking_aicpu_op_); | |||
GELOGD("Get op:%s attribute(is_blocking_op), value:%d", op_desc_->GetName().c_str(), is_blocking_aicpu_op_); | |||
aicpu_ext_handle_.reset(new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc_->GetName(), | |||
num_inputs_, | |||
num_outputs_, | |||
@@ -595,6 +633,13 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint | |||
GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateSessionInfo(ULLONG_MAX, kernel_id, false), | |||
"[Update][SessionInfo] failed."); | |||
if (is_blocking_aicpu_op_) { | |||
if (UpdateEventIdForBlockingAicpuOp() != SUCCESS) { | |||
GELOGE(FAILED, "[Call][UpdateEventIdForBlockingAicpuOp] Call UpdateEventIdForBlockingAicpuOp failed"); | |||
return FAILED; | |||
} | |||
} | |||
GE_CHK_RT_RET(rtMalloc(&ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), RT_MEMORY_HBM)); | |||
GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), | |||
aicpu_ext_handle_->GetExtInfo(), aicpu_ext_handle_->GetExtInfoLen(), | |||
@@ -770,6 +815,63 @@ Status AiCpuBaseTask::UpdateIoAddr(const vector<DataBuffer> &inputs, const vecto | |||
return SUCCESS; | |||
} | |||
Status AiCpuBaseTask::CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support) { | |||
int32_t device_id = 0; | |||
auto rt_ret = rtGetDevice(&device_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtGetDevice failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][rtGetDevice] failed, ret:0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
int32_t value = 0; | |||
rt_ret = rtGetDeviceCapability(device_id, FEATURE_TYPE_BLOCKING_OPERATOR, RT_MODULE_TYPE_AICPU, &value); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtGetDeviceCapability failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][rtGetDeviceCapability] failed, ret:0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
if (value != RT_AICPU_BLOCKING_OP_NOT_SUPPORT && value != RT_AICPU_BLOCKING_OP_SUPPORT) { | |||
REPORT_INNER_ERROR("E19999", "Value should be %d or %d but %d", | |||
RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); | |||
GELOGE(FAILED, "[Check][Value] Value should be %d or %d but %d", | |||
RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, value); | |||
return FAILED; | |||
} | |||
is_support = (value == RT_AICPU_BLOCKING_OP_SUPPORT ? true : false); | |||
return SUCCESS; | |||
} | |||
Status AiCpuBaseTask::DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream) { | |||
bool is_support = false; | |||
if (CheckDeviceSupportBlockingAicpuOpProcess(is_support) != SUCCESS) { | |||
GELOGE(FAILED, "[Call][CheckDeviceSupportBlockingAicpuOpProcess] Call CheckDeviceSupportBlockingAicpuOpProcess failed"); | |||
return FAILED; | |||
} | |||
if (!is_support) { | |||
GELOGD("Device not support blocking aicpu op process."); | |||
return SUCCESS; | |||
} | |||
GELOGI("Distribute queue task begin"); | |||
if (rt_event_ == nullptr) { | |||
REPORT_INNER_ERROR("E19999", "rt_event_ is nullptr"); | |||
GELOGE(FAILED, "[Check][rt_event_] rt_event_ is nullptr"); | |||
return FAILED; | |||
} | |||
auto rt_ret = rtStreamWaitEvent(stream, rt_event_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtEventReset(rt_event_, stream); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
REPORT_CALL_ERROR("E19999", "Call rtEventReset failed, ret:0x%X", rt_ret); | |||
GELOGE(RT_FAILED, "[Call][RtApi] failed, ret:0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
return SUCCESS; | |||
} | |||
AiCpuTask::~AiCpuTask() { | |||
FreeHbm(args_); | |||
FreeHbm(io_addr_); | |||
@@ -813,6 +915,14 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) { | |||
GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); | |||
GELOGD("Done launch kernel successfully. task = %s", this->op_type_.c_str()); | |||
if (is_blocking_aicpu_op_) { | |||
if (DistributeWaitTaskForAicpuBlockingOp(stream) != SUCCESS) { | |||
GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed"); | |||
return FAILED; | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -1089,6 +1199,13 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { | |||
} | |||
GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); | |||
GELOGD("Invoke rtCpuKernelLaunch succeeded"); | |||
if (is_blocking_aicpu_op_) { | |||
if (DistributeWaitTaskForAicpuBlockingOp(stream) != SUCCESS) { | |||
GELOGE(FAILED, "[Call][DistributeWaitTaskForAicpuBlockingOp] Call DistributeWaitTaskForAicpuBlockingOp failed"); | |||
return FAILED; | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -178,6 +178,10 @@ class AiCpuBaseTask : public OpTask { | |||
rtStream_t stream); | |||
Status UpdateOutputShape(vector<GeTensorDesc> &output_desc); | |||
Status UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensorDesc &output_desc); | |||
// for blocking aicpu op | |||
Status DistributeWaitTaskForAicpuBlockingOp(rtStream_t stream); | |||
Status UpdateEventIdForBlockingAicpuOp(); | |||
Status CheckDeviceSupportBlockingAicpuOpProcess(bool &is_support); | |||
protected: | |||
size_t num_inputs_ = 0; | |||
@@ -186,6 +190,9 @@ class AiCpuBaseTask : public OpTask { | |||
std::unique_ptr<ge::hybrid::AicpuExtInfoHandler> aicpu_ext_handle_; | |||
void *ext_info_addr_dev_ = nullptr; | |||
vector<bool> input_is_const_; | |||
// for blocking aicpu op | |||
bool is_blocking_aicpu_op_ = false; | |||
rtEvent_t rt_event_ = nullptr; | |||
}; | |||
class AiCpuTask : public AiCpuBaseTask { | |||
@@ -43,14 +43,21 @@ struct DNNEngineAttribute { | |||
// If engine input format must be specific, set this attribute, else set FORMAT_RESERVED | |||
Format engine_input_format; | |||
Format engine_output_format; | |||
bool atomic_engine_flag; | |||
}; | |||
class GE_FUNC_VISIBILITY DNNEngine { | |||
public: | |||
DNNEngine() = default; | |||
explicit DNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; } | |||
virtual ~DNNEngine() = default; | |||
virtual Status Initialize(const std::map<std::string, std::string> &options) = 0; | |||
virtual Status Finalize() = 0; | |||
virtual void GetAttributes(DNNEngineAttribute &attr) const = 0; | |||
Status Initialize(const std::map<std::string, std::string> &options) { return SUCCESS; } | |||
Status Finalize() { return SUCCESS; } | |||
void GetAttributes(DNNEngineAttribute &attr) const { attr = engine_attribute_; } | |||
bool IsAtomic() const { return engine_attribute_.atomic_engine_flag; } | |||
protected: | |||
DNNEngineAttribute engine_attribute_; | |||
}; | |||
} // namespace ge | |||
@@ -1 +1 @@ | |||
Subproject commit a725349b65aef2940555af2ddb7b9461fbe0d5fd | |||
Subproject commit 8f2c4395c346af026c470b47a7c52f2ab5b51f90 |
@@ -1 +1 @@ | |||
Subproject commit 7a2daaa2625505e1a15e1faa46c90df1a23dd6fa | |||
Subproject commit 72d6fcd776ea2eba8000249fd02c8948042e9856 |
@@ -16,12 +16,94 @@ | |||
#include <cce/dnn.h> | |||
#include <securec.h> | |||
#include "runtime_stub.h" | |||
#include "runtime/rt.h" | |||
#define ADD_STUB_RETURN_VALUE(FUNC, TYPE) std::vector<TYPE> g_Stub_##FUNC##_RETURN | |||
#define GET_STUB_RETURN_VALUE(FUNC, TYPE, DEFAULT) ({ \ | |||
TYPE result = DEFAULT; \ | |||
if (!g_Stub_##FUNC##_RETURN.empty()) { \ | |||
result = g_Stub_##FUNC##_RETURN.back(); \ | |||
g_Stub_##FUNC##_RETURN.pop_back(); \ | |||
} \ | |||
result; \ | |||
}) | |||
#define DEL_STUB_RETURN_VALUE(FUNC, TYPE) \ | |||
do { \ | |||
extern std::vector<TYPE> g_Stub_##FUNC##_RETURN; \ | |||
g_Stub_##FUNC##_RETURN.clear(); \ | |||
} while (0) | |||
#define ADD_STUB_OUTBOUND_VALUE(FUNC, TYPE, NAME) std::vector<TYPE> g_Stub_##FUNC##_OUT_##NAME | |||
#define GET_STUB_OUTBOUND_VALUE(FUNC, TYPE, NAME, DEFAULT) ({ \ | |||
TYPE value; \ | |||
if (!g_Stub_##FUNC##_OUT_##NAME.empty()) { \ | |||
value = g_Stub_##FUNC##_OUT_##NAME.back(); \ | |||
g_Stub_##FUNC##_OUT_##NAME.pop_back(); \ | |||
} else { \ | |||
value = DEFAULT; \ | |||
} \ | |||
value; \ | |||
}) | |||
#define DEL_STUB_OUTBOUND_VALUE(FUNC, TYPE, NAME) \ | |||
do { \ | |||
extern std::vector<TYPE> g_Stub_##FUNC##_OUT_##NAME; \ | |||
g_Stub_##FUNC##_OUT_##NAME.clear(); \ | |||
} while (0) | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif | |||
#define EVENT_LENTH 10 | |||
void rtStubTearDown() { | |||
DEL_STUB_RETURN_VALUE(rtGetDevice, rtError_t); | |||
DEL_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t); | |||
DEL_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t); | |||
DEL_STUB_RETURN_VALUE(rtEventReset, rtError_t); | |||
DEL_STUB_RETURN_VALUE(rtEventCreate, rtError_t); | |||
DEL_STUB_RETURN_VALUE(rtGetEventID, rtError_t); | |||
} | |||
ADD_STUB_RETURN_VALUE(rtGetDevice, rtError_t); | |||
rtError_t rtGetDevice(int32_t *device) { | |||
return GET_STUB_RETURN_VALUE(rtGetDevice, rtError_t, RT_ERROR_NONE); | |||
} | |||
ADD_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t); | |||
ADD_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value); | |||
rtError_t rtGetDeviceCapability(int32_t device, int32_t moduleType, int32_t featureType, int32_t *value) { | |||
*value = GET_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT); | |||
return GET_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||
} | |||
ADD_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t); | |||
rtError_t rtStreamWaitEvent(rtStream_t stream, rtEvent_t event) { | |||
return GET_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, RT_ERROR_NONE); | |||
} | |||
ADD_STUB_RETURN_VALUE(rtEventReset, rtError_t); | |||
rtError_t rtEventReset(rtEvent_t event, rtStream_t stream) { | |||
return GET_STUB_RETURN_VALUE(rtEventReset, rtError_t, RT_ERROR_NONE); | |||
} | |||
ADD_STUB_RETURN_VALUE(rtEventCreate, rtError_t); | |||
rtError_t rtEventCreate(rtEvent_t *event) { | |||
*event = new int[EVENT_LENTH]; | |||
return GET_STUB_RETURN_VALUE(rtEventCreate, rtError_t, RT_ERROR_NONE); | |||
} | |||
ADD_STUB_RETURN_VALUE(rtGetEventID, rtError_t); | |||
rtError_t rtGetEventID(rtEvent_t event, uint32_t *event_id) { | |||
*event_id = 0; | |||
return GET_STUB_RETURN_VALUE(rtEventCreate, rtError_t, RT_ERROR_NONE); | |||
} | |||
rtError_t rtCtxSetCurrent(rtContext_t ctx) { return RT_ERROR_NONE; } | |||
rtError_t rtGetStreamId(rtStream_t stream, int32_t *stream_id) { | |||
@@ -42,11 +124,6 @@ rtError_t rtEventGetTimeStamp(uint64_t *time, rtEvent_t event) { | |||
return RT_ERROR_NONE; | |||
} | |||
rtError_t rtEventCreate(rtEvent_t *event) { | |||
*event = new int[EVENT_LENTH]; | |||
return RT_ERROR_NONE; | |||
} | |||
rtError_t rtEventCreateWithFlag(rtEvent_t *event, uint32_t flag) { | |||
return rtEventCreate(event); | |||
} | |||
@@ -112,8 +189,6 @@ rtError_t rtMemcpyAsync(void *dst, uint64_t dest_max, const void *src, uint64_t | |||
return RT_ERROR_NONE; | |||
} | |||
rtError_t rtStreamWaitEvent(rtStream_t stream, rtEvent_t event) { return RT_ERROR_NONE; } | |||
rtError_t rtSetTSDevice(uint32_t tsId) { | |||
return RT_ERROR_NONE; | |||
} | |||
@@ -347,10 +422,6 @@ rtError_t rtStreamSwitchEx(void *ptr, rtCondition_t condition, void *value_ptr, | |||
rtError_t rtStreamActive(rtStream_t active_stream, rtStream_t stream) { return RT_ERROR_NONE; } | |||
rtError_t rtEventReset(rtEvent_t event, rtStream_t stream) { return RT_ERROR_NONE; } | |||
rtError_t rtGetDevice(int32_t *device) { return RT_ERROR_NONE; } | |||
rtError_t rtDatadumpInfoLoad(const void *dump_info, uint32_t length) { return RT_ERROR_NONE; } | |||
rtError_t rtKernelLaunchWithFlag(const void *stub_func, uint32_t block_dim, void *args, uint32_t args_size, | |||
@@ -467,6 +538,14 @@ rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream) { | |||
return RT_ERROR_NONE; | |||
} | |||
rtError_t rtGetAddrAndPrefCntWithHandle(void *handle, const void *devFunc, void **addr, uint32_t *prefetchCnt) { | |||
return RT_ERROR_NONE; | |||
} | |||
rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stream) { | |||
return RT_ERROR_NONE; | |||
} | |||
rtError_t rtKernelLaunchFwk(const char *opName, void *args, uint32_t argSize, uint32_t flags, rtStream_t rtStream) { | |||
return RT_ERROR_NONE; | |||
} | |||
@@ -0,0 +1,70 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef __INC_LLT_RUNTIME_STUB_H | |||
#define __INC_LLT_RUNTIME_STUB_H | |||
#include <vector> | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif | |||
void rtStubTearDown(); | |||
#define RTS_STUB_SETUP() \ | |||
do { \ | |||
rtStubTearDown(); \ | |||
} while (0) | |||
#define RTS_STUB_TEARDOWN() \ | |||
do { \ | |||
rtStubTearDown(); \ | |||
} while (0) | |||
#define RTS_STUB_RETURN_VALUE(FUNC, TYPE, VALUE) \ | |||
do { \ | |||
g_Stub_##FUNC##_RETURN.emplace(g_Stub_##FUNC##_RETURN.begin(), VALUE); \ | |||
} while (0) | |||
#define RTS_STUB_OUTBOUND_VALUE(FUNC, TYPE, NAME, VALUE) \ | |||
do { \ | |||
g_Stub_##FUNC##_OUT_##NAME.emplace(g_Stub_##FUNC##_OUT_##NAME.begin(), VALUE); \ | |||
} while (0) | |||
#define RTS_STUB_RETURN_EXTERN(FUNC, TYPE) extern std::vector<TYPE> g_Stub_##FUNC##_RETURN; | |||
#define RTS_STUB_OUTBOUND_EXTERN(FUNC, TYPE, NAME) extern std::vector<TYPE> g_Stub_##FUNC##_OUT_##NAME; | |||
RTS_STUB_RETURN_EXTERN(rtGetDevice, rtError_t); | |||
RTS_STUB_OUTBOUND_EXTERN(rtGetDevice, int32_t, device) | |||
RTS_STUB_RETURN_EXTERN(rtGetDeviceCapability, rtError_t); | |||
RTS_STUB_OUTBOUND_EXTERN(rtGetDeviceCapability, int32_t, value); | |||
RTS_STUB_RETURN_EXTERN(rtStreamWaitEvent, rtError_t); | |||
RTS_STUB_RETURN_EXTERN(rtEventReset, rtError_t); | |||
RTS_STUB_RETURN_EXTERN(rtEventCreate, rtError_t); | |||
RTS_STUB_OUTBOUND_EXTERN(rtEventCreate, rtEvent_t, event); | |||
RTS_STUB_RETURN_EXTERN(rtGetEventID, rtError_t); | |||
RTS_STUB_OUTBOUND_EXTERN(rtEventCreate, uint32_t, event_id); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // __INC_LLT_RUNTIME_STUB_H |
@@ -45,7 +45,7 @@ file(GLOB_RECURSE METADEF_REGISTER_SRCS CONFIGURE_DEPENDS | |||
"${GE_CODE_DIR}/metadef/register/*.cpp" | |||
) | |||
file(GLOB_RECURSE PARSER_SRCS CONFIGURE_DEPENDS | |||
file(GLOB_RECURSE PARSER_SRCS CONFIGURE_DEPENDS | |||
"${GE_CODE_DIR}/parser/parser/common/*.cc" | |||
) | |||
@@ -114,7 +114,6 @@ list(APPEND INCLUDE_DIRECTORIES | |||
list(APPEND STUB_LIBS | |||
c_sec | |||
slog_stub | |||
cce_ge_stub | |||
runtime_stub | |||
profiler_stub | |||
hccl_stub | |||
@@ -226,7 +225,7 @@ add_custom_command( | |||
add_library(graphengine STATIC ${PARSER_SRCS} ${GE_SRCS}) | |||
target_include_directories(graphengine | |||
PUBLIC | |||
PUBLIC | |||
"${INCLUDE_DIRECTORIES}" | |||
"${GE_CODE_DIR}/ge/host_cpu_engine" | |||
) | |||
@@ -16,7 +16,6 @@ | |||
#include "ge_graph_dsl/op_desc/op_desc_cfg_repo.h" | |||
#include "framework/common/types.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "ge_graph_dsl/op_desc/op_desc_cfg.h" | |||
GE_NS_BEGIN | |||
@@ -39,6 +38,8 @@ static std::map<OpType, OpDescCfg> cfg_repo{OP_CFG(DATA, 1, 1, FORMAT_NCHW, DT_F | |||
OP_CFG(EXIT, 1, 1, FORMAT_NCHW, DT_FLOAT, {1, 1, 224, 224}), | |||
OP_CFG(NEXTITERATION, 1, 1, FORMAT_NCHW, DT_FLOAT, {1, 1, 224, 224}), | |||
OP_CFG(NETOUTPUT, 2, 2, FORMAT_NCHW, DT_FLOAT, {1, 1, 224, 224}), | |||
OP_CFG(CONSTANTOP, 0, 1, FORMAT_NCHW, DT_FLOAT, {1, 1, 224, 224}), | |||
OP_CFG(GETNEXT, 0, 1, FORMAT_NCHW, DT_FLOAT, {1, 1, 224, 224}), | |||
OP_CFG(VARIABLE, 1, 1)}; | |||
} // namespace | |||
@@ -15,7 +15,6 @@ | |||
*/ | |||
#include "framework/common/types.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "ge_graph_dsl/ge.h" | |||
GE_NS_BEGIN | |||
@@ -32,9 +31,10 @@ REGISTER_OPTYPE_DEFINE(ADD, "Add"); | |||
REGISTER_OPTYPE_DEFINE(WHILE, "While"); | |||
REGISTER_OPTYPE_DEFINE(ENTER, "Enter"); | |||
REGISTER_OPTYPE_DEFINE(MERGE, "Merge"); | |||
REGISTER_OPTYPE_DEFINE(LOOPCOND, "Loopcond"); | |||
REGISTER_OPTYPE_DEFINE(LOOPCOND, "LoopCond"); | |||
REGISTER_OPTYPE_DEFINE(SWITCH, "Switch"); | |||
REGISTER_OPTYPE_DEFINE(EXIT, "Exit"); | |||
REGISTER_OPTYPE_DEFINE(NEXTITERATION, "Nextiteration"); | |||
REGISTER_OPTYPE_DEFINE(NEXTITERATION, "NextIteration"); | |||
REGISTER_OPTYPE_DEFINE(GETNEXT, "GetNext"); | |||
GE_NS_END |
@@ -20,6 +20,7 @@ | |||
#include "fake_ns.h" | |||
#include "opskernel_manager/ops_kernel_manager.h" | |||
#include "register/ops_kernel_builder_registry.h" | |||
#include "plugin/engine/engine_manage.h" | |||
FAKE_NS_BEGIN | |||
@@ -27,6 +28,9 @@ struct EnvInstaller { | |||
virtual void InstallTo(std::map<string, OpsKernelInfoStorePtr>&) const {} | |||
virtual void InstallTo(std::map<string, GraphOptimizerPtr>&) const {} | |||
virtual void InstallTo(std::map<string, OpsKernelBuilderPtr>&) const {} | |||
virtual void InstallTo(std::map<string, std::set<std::string>>&) const {} | |||
virtual void InstallTo(std::map<string, std::string>&) const {} | |||
virtual void InstallTo(std::map<string, DNNEnginePtr>&) const {} | |||
virtual void Install() const {} | |||
}; | |||
@@ -0,0 +1,40 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef INC_4DCD71AA72F8492D8594C49094B92528 | |||
#define INC_4DCD71AA72F8492D8594C49094B92528 | |||
#include "ge_running_env/fake_ns.h" | |||
#include "common/optimizer/graph_optimizer.h" | |||
FAKE_NS_BEGIN | |||
struct FakeAtomicOptimizer : GraphOptimizer { | |||
explicit FakeAtomicOptimizer(const std::string &engine_name) : engine_name_(engine_name) {} | |||
private: | |||
Status Initialize(const map<string, string> &options) override; | |||
Status Finalize() override; | |||
Status OptimizeOriginalGraph(ComputeGraph &graph) override; | |||
Status OptimizeFusedGraph(ComputeGraph &graph) override; | |||
Status OptimizeWholeGraph(ComputeGraph &graph) override; | |||
Status GetAttributes(GraphOptimizerAttribute &attrs) const override; | |||
protected: | |||
std::string engine_name_; | |||
}; | |||
FAKE_NS_END | |||
#endif |
@@ -0,0 +1,40 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef INC_897A92FE9414452E8912FC7204E018A8 | |||
#define INC_897A92FE9414452E8912FC7204E018A8 | |||
#include "ge_running_env/fake_ns.h" | |||
#include "ge_running_env/fake_engine.h" | |||
#include "common/optimizer/graph_optimizer.h" | |||
FAKE_NS_BEGIN | |||
struct FakeCompoundEngine : FakeEngine { | |||
FakeCompoundEngine(const std::string &name, const std::set<std::string> &sub_engines) : FakeEngine(name), | |||
sub_engines_(sub_engines) {} | |||
private: | |||
void InstallTo(std::map<std::string, GraphOptimizerPtr>&) const override; | |||
void InstallTo(std::map<std::string, OpsKernelInfoStorePtr>&) const override; | |||
void InstallTo(std::map<std::string, std::set<std::string>>&) const override; | |||
void InstallTo(std::map<std::string, std::string>&) const override; | |||
void InstallTo(std::map<std::string, DNNEnginePtr>&) const override; | |||
private: | |||
std::set<std::string> sub_engines_; | |||
}; | |||
FAKE_NS_END | |||
#endif |
@@ -0,0 +1,34 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef INC_8E85C90AF30E4DBF9EF50467846EDA88 | |||
#define INC_8E85C90AF30E4DBF9EF50467846EDA88 | |||
#include "ge_running_env/fake_ns.h" | |||
#include "ge_running_env/fake_atomic_optimizer.h" | |||
FAKE_NS_BEGIN | |||
struct FakeCompoundOptimizer : FakeAtomicOptimizer { | |||
public: | |||
explicit FakeCompoundOptimizer(const std::string &engine_name) : FakeAtomicOptimizer(engine_name) {} | |||
private: | |||
Status OptimizeFusedGraph(ComputeGraph &graph) override; | |||
static uint32_t thread_scope_id_; | |||
}; | |||
FAKE_NS_END | |||
#endif |
@@ -39,14 +39,16 @@ struct FakeEngine : EnvInstaller { | |||
private: | |||
void InstallTo(std::map<string, OpsKernelInfoStorePtr>&) const override; | |||
void InstallTo(std::map<string, OpsKernelBuilderPtr>&) const override; | |||
void InstallTo(std::map<std::string, DNNEnginePtr>&) const override; | |||
private: | |||
template <typename BasePtr, typename SubClass> | |||
void InstallFor(std::map<string, BasePtr>& maps, const std::map<std::string, std::shared_ptr<SubClass>>&) const; | |||
private: | |||
protected: | |||
std::string engine_name_; | |||
std::set<std::string> info_store_names_; | |||
private: | |||
std::map<std::string, FakeOpsKernelBuilderPtr> custom_builders_; | |||
std::map<std::string, FakeOpsKernelInfoStorePtr> custom_info_stores_; | |||
}; | |||
@@ -38,6 +38,9 @@ struct GeRunningEnvFaker { | |||
std::map<string, OpsKernelInfoStorePtr> &ops_kernel_info_stores_; | |||
std::map<string, GraphOptimizerPtr> &ops_kernel_optimizers_; | |||
std::map<string, OpsKernelBuilderPtr> &ops_kernel_builders_; | |||
std::map<string, std::set<std::string>> &compound_engines_contains_; | |||
std::map<string, std::string> &compound_engine_2_kernel_lib_name_; | |||
std::map<std::string, DNNEnginePtr> &engine_map_; | |||
}; | |||
FAKE_NS_END | |||
@@ -0,0 +1,46 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include "ge_running_env/fake_atomic_optimizer.h" | |||
FAKE_NS_BEGIN | |||
Status FakeAtomicOptimizer::Initialize(const map<string, string> &options) { | |||
return SUCCESS; | |||
}; | |||
Status FakeAtomicOptimizer::Finalize() { | |||
return SUCCESS; | |||
} | |||
Status FakeAtomicOptimizer::OptimizeOriginalGraph(ComputeGraph &graph) { | |||
return SUCCESS; | |||
} | |||
Status FakeAtomicOptimizer::OptimizeFusedGraph(ComputeGraph& graph) { | |||
return SUCCESS; | |||
} | |||
Status FakeAtomicOptimizer::OptimizeWholeGraph(ComputeGraph &graph) { | |||
return SUCCESS; | |||
} | |||
Status FakeAtomicOptimizer::GetAttributes(GraphOptimizerAttribute &attrs) const { | |||
attrs.engineName = engine_name_; | |||
return SUCCESS; | |||
} | |||
FAKE_NS_END |
@@ -0,0 +1,48 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include "ge_running_env/fake_compound_engine.h" | |||
#include "ge_running_env/fake_compound_optimizer.h" | |||
FAKE_NS_BEGIN | |||
void FakeCompoundEngine::InstallTo(std::map<std::string, GraphOptimizerPtr> &graph_optimizers) const { | |||
auto optimizer = std::make_shared<FakeCompoundOptimizer>(engine_name_); | |||
graph_optimizers[engine_name_] = optimizer; | |||
} | |||
void FakeCompoundEngine::InstallTo(std::map<std::string, OpsKernelInfoStorePtr>&) const { | |||
} | |||
void FakeCompoundEngine::InstallTo(std::map<std::string, std::set<std::string>> &compound_engine_contains) const { | |||
compound_engine_contains[engine_name_] = sub_engines_; | |||
} | |||
void FakeCompoundEngine::InstallTo(std::map<std::string, std::string> &compound_engine_2_kernel_lib_name) const { | |||
if (info_store_names_.size() != 1) { | |||
return; | |||
} | |||
compound_engine_2_kernel_lib_name[engine_name_] = *info_store_names_.begin(); | |||
} | |||
void FakeCompoundEngine::InstallTo(std::map<string, DNNEnginePtr> &engines) const { | |||
DNNEngineAttribute attr; | |||
attr.engine_name = engine_name_; | |||
attr.atomic_engine_flag = false; | |||
engines[engine_name_] = MakeShared<DNNEngine>(attr); | |||
} | |||
FAKE_NS_END |
@@ -0,0 +1,61 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include "ge_running_env/fake_compound_optimizer.h" | |||
#include "graph/utils/node_utils.h" | |||
#include "graph/utils/graph_utils.h" | |||
#include "graph/utils/op_desc_utils.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "framework/common/types.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "framework/common/util.h" | |||
FAKE_NS_BEGIN | |||
uint32_t FakeCompoundOptimizer::thread_scope_id_ = 0; | |||
Status FakeCompoundOptimizer::OptimizeFusedGraph(ComputeGraph& graph) { | |||
std::set<NodePtr> nodes; | |||
for (const auto &node : graph.GetDirectNode()) { | |||
const auto &type = NodeUtils::GetNodeType(node); | |||
if ((type != PLACEHOLDER) && (type != END)) { | |||
nodes.emplace(node); | |||
} | |||
} | |||
if (nodes.size() == 1) { | |||
return SUCCESS; | |||
} | |||
const std::string &subgraph_name = "PartitionedCall_" + std::to_string(thread_scope_id_); | |||
const auto &subgraph = GraphUtils::BuildSubgraphWithNodes(graph, nodes, subgraph_name); | |||
if (subgraph == nullptr) { | |||
GELOGE(FAILED, "Build subgraph %s failed", subgraph_name.c_str()); | |||
return FAILED; | |||
} | |||
const auto &parent_node = subgraph->GetParentNode(); | |||
GE_CHECK_NOTNULL(parent_node); | |||
(void)AttrUtils::SetStr(parent_node->GetOpDesc(), ATTR_NAME_FFTS_PLUS_SUB_GRAPH, subgraph_name); | |||
for (const auto &node : subgraph->GetAllNodes()) { | |||
(void)AttrUtils::SetInt(node->GetOpDesc(), ATTR_NAME_THREAD_SCOPE_ID, thread_scope_id_); | |||
} | |||
thread_scope_id_++; | |||
return SUCCESS; | |||
} | |||
FAKE_NS_END |
@@ -15,9 +15,6 @@ | |||
*/ | |||
#include "ge_running_env/fake_engine.h" | |||
#include "ge_running_env/fake_ops_kernel_builder.h" | |||
#include "ge_running_env/fake_ops_kernel_info_store.h" | |||
#include "opskernel_manager/ops_kernel_manager.h" | |||
FAKE_NS_BEGIN | |||
@@ -78,4 +75,11 @@ void FakeEngine::InstallTo(std::map<string, OpsKernelBuilderPtr> &ops_kernel_bui | |||
InstallFor<OpsKernelBuilderPtr, FakeOpsKernelBuilder>(ops_kernel_builders, custom_builders_); | |||
} | |||
void FakeEngine::InstallTo(std::map<string, DNNEnginePtr> &engines) const { | |||
DNNEngineAttribute attr; | |||
attr.engine_name = engine_name_; | |||
attr.atomic_engine_flag = true; | |||
engines[engine_name_] = MakeShared<DNNEngine>(attr); | |||
} | |||
FAKE_NS_END |
@@ -32,6 +32,8 @@ std::vector<FakeOp> fake_ops = { | |||
FakeOp(SWITCH).InfoStoreAndBuilder("RTSLib"), FakeOp(LOOPCOND).InfoStoreAndBuilder("RTSLib"), | |||
FakeOp(STREAMMERGE).InfoStoreAndBuilder("RTSLib"), FakeOp(STREAMSWITCH).InfoStoreAndBuilder("RTSLib"), | |||
FakeOp(STREAMACTIVE).InfoStoreAndBuilder("RTSLib"), FakeOp(EXIT).InfoStoreAndBuilder("RTSLib"), | |||
FakeOp(SEND).InfoStoreAndBuilder("RTSLib"), FakeOp(RECV).InfoStoreAndBuilder("RTSLib"), | |||
FakeOp(IDENTITY).InfoStoreAndBuilder("RTSLib"), FakeOp(IDENTITYN).InfoStoreAndBuilder("RTSLib"), | |||
FakeOp(LESS).InfoStoreAndBuilder("AiCoreLib"), FakeOp(NEXTITERATION).InfoStoreAndBuilder("AiCoreLib"), | |||
FakeOp(CAST).InfoStoreAndBuilder("AiCoreLib"), FakeOp(TRANSDATA).InfoStoreAndBuilder("AiCoreLib"), | |||
@@ -53,4 +55,4 @@ void GeDefaultRunningEnv::InstallTo(GeRunningEnvFaker& ge_env) { | |||
} | |||
} | |||
FAKE_NS_END | |||
FAKE_NS_END |
@@ -15,34 +15,41 @@ | |||
*/ | |||
#include <map> | |||
#include <algorithm> | |||
#include "external/ge/ge_api.h" | |||
#include "opskernel_manager/ops_kernel_builder_manager.h" | |||
#include "init/gelib.h" | |||
#include "utility" | |||
#include "plugin/engine/engine_manage.h" | |||
#include "ge_running_env/ge_running_env_faker.h" | |||
#include "ge_default_running_env.h" | |||
#include "ge_running_env/env_installer.h" | |||
#include "op/fake_op_repo.h" | |||
FAKE_NS_BEGIN | |||
namespace { | |||
OpsKernelManager& getKernelManger() { | |||
OpsKernelManager& getKernelManager() { | |||
std::shared_ptr<GELib> instancePtr = ge::GELib::GetInstance(); | |||
return instancePtr->OpsKernelManagerObj(); | |||
} | |||
DNNEngineManager& getDNNEngindManager() { | |||
std::shared_ptr<GELib> instancePtr = ge::GELib::GetInstance(); | |||
return instancePtr->DNNEngineManagerObj(); | |||
} | |||
struct InitEnv { | |||
static InitEnv& GetInstance() { | |||
static InitEnv instance; | |||
return instance; | |||
} | |||
void reset(std::map<string, OpsKernelInfoStorePtr>& ops_kernel_info_stores, | |||
std::map<string, OpsKernelBuilderPtr>& builders) { | |||
void reset(std::map<string, OpsKernelInfoStorePtr> &ops_kernel_info_stores, | |||
std::map<string, OpsKernelBuilderPtr> &builders, | |||
std::map<string, GraphOptimizerPtr> &ops_kernel_optimizers, | |||
std::map<string, std::set<std::string>> &compound_engines_contains, | |||
std::map<string, std::string> &compound_engine_2_kernel_lib_name, | |||
std::map<string, DNNEnginePtr> &engines) { | |||
std::set<string> remove_info_names; | |||
for (auto iter : ops_kernel_info_stores) { | |||
for (auto iter : builders) { | |||
if (kernel_info_names.find(iter.first) == kernel_info_names.end()) { | |||
remove_info_names.insert(iter.first); | |||
} | |||
@@ -50,12 +57,16 @@ struct InitEnv { | |||
for (auto info_name : remove_info_names) { | |||
ops_kernel_info_stores.erase(info_name); | |||
builders.erase(info_name); | |||
ops_kernel_optimizers.erase(info_name); | |||
compound_engines_contains.erase(info_name); | |||
compound_engine_2_kernel_lib_name.erase(info_name); | |||
engines.erase(info_name); | |||
} | |||
} | |||
private: | |||
InitEnv() { | |||
for (auto iter : getKernelManger().GetAllOpsKernelInfoStores()) { | |||
for (auto iter : getKernelManager().GetAllOpsKernelInfoStores()) { | |||
kernel_info_names.insert(iter.first); | |||
} | |||
} | |||
@@ -66,20 +77,26 @@ struct InitEnv { | |||
} // namespace | |||
GeRunningEnvFaker::GeRunningEnvFaker() | |||
: op_kernel_info_(const_cast<std::map<string, vector<OpInfo>>&>(getKernelManger().GetAllOpsKernelInfo())), | |||
: op_kernel_info_(const_cast<std::map<std::string, vector<OpInfo>>&>(getKernelManager().GetAllOpsKernelInfo())), | |||
ops_kernel_info_stores_( | |||
const_cast<std::map<string, OpsKernelInfoStorePtr>&>(getKernelManger().GetAllOpsKernelInfoStores())), | |||
const_cast<std::map<std::string, OpsKernelInfoStorePtr>&>(getKernelManager().GetAllOpsKernelInfoStores())), | |||
ops_kernel_optimizers_( | |||
const_cast<std::map<string, GraphOptimizerPtr>&>(getKernelManger().GetAllGraphOptimizerObjs())), | |||
ops_kernel_builders_(const_cast<std::map<string, OpsKernelBuilderPtr>&>( | |||
OpsKernelBuilderManager::Instance().GetAllOpsKernelBuilders())) { | |||
const_cast<std::map<std::string, GraphOptimizerPtr>&>(getKernelManager().GetAllGraphOptimizerObjs())), | |||
ops_kernel_builders_(const_cast<std::map<std::string, OpsKernelBuilderPtr>&>( | |||
OpsKernelBuilderManager::Instance().GetAllOpsKernelBuilders())), | |||
compound_engines_contains_( | |||
const_cast<std::map<std::string, std::set<std::string>>&>(getKernelManager().GetCompoundEngineContains())), | |||
compound_engine_2_kernel_lib_name_( | |||
const_cast<std::map<std::string, std::string>&>(getKernelManager().GetCompoundEngineKernelLibName())), | |||
engine_map_(const_cast<std::map<std::string, DNNEnginePtr>&>(getDNNEngindManager().GetAllEngines())) { | |||
Reset(); | |||
} | |||
GeRunningEnvFaker& GeRunningEnvFaker::Reset() { | |||
InitEnv& init_env = InitEnv::GetInstance(); | |||
FakeOpRepo::Reset(); | |||
init_env.reset(ops_kernel_info_stores_, ops_kernel_builders_); | |||
init_env.reset(ops_kernel_info_stores_, ops_kernel_builders_, ops_kernel_optimizers_, | |||
compound_engines_contains_, compound_engine_2_kernel_lib_name_, engine_map_); | |||
flush(); | |||
return *this; | |||
} | |||
@@ -91,13 +108,17 @@ GeRunningEnvFaker& GeRunningEnvFaker::Install(const EnvInstaller& installer) { | |||
installer.InstallTo(ops_kernel_info_stores_); | |||
installer.InstallTo(ops_kernel_optimizers_); | |||
installer.InstallTo(ops_kernel_builders_); | |||
installer.InstallTo(compound_engines_contains_); | |||
installer.InstallTo(compound_engine_2_kernel_lib_name_); | |||
installer.InstallTo(engine_map_); | |||
flush(); | |||
return *this; | |||
} | |||
void GeRunningEnvFaker::flush() { | |||
op_kernel_info_.clear(); | |||
getKernelManger().GetOpsKernelInfo(""); | |||
getKernelManager().GetOpsKernelInfo(""); | |||
} | |||
GeRunningEnvFaker& GeRunningEnvFaker::InstallDefault() { | |||
@@ -20,9 +20,10 @@ | |||
#include "external/ge/ge_api.h" | |||
#include "opskernel_manager/ops_kernel_builder_manager.h" | |||
#include "ge_running_env/fake_ops_kernel_builder.h" | |||
#include "ge_running_env/fake_ns.h" | |||
#include "ge_running_env/ge_running_env_faker.h" | |||
#include "ge_running_env/fake_op.h" | |||
#include "ge_running_env/fake_compound_engine.h" | |||
FAKE_NS_BEGIN | |||
#define ASSERT_OPS_LIST_SIZE(list_size) \ | |||
@@ -35,6 +36,7 @@ class GeRunningEvnFakerTest : public testing::Test { | |||
void SetUp() {} | |||
OpsKernelManager &kernel_manager = ge::GELib::GetInstance()->OpsKernelManagerObj(); | |||
OpsKernelBuilderManager &builder_manager = OpsKernelBuilderManager::Instance(); | |||
DNNEngineManager &dnnengine_manager = ge::GELib::GetInstance()->DNNEngineManagerObj(); | |||
}; | |||
TEST_F(GeRunningEvnFakerTest, test_reset_running_env_is_success) { | |||
@@ -142,7 +144,31 @@ TEST_F(GeRunningEvnFakerTest, test_install_default_fake_engine_success) { | |||
ASSERT_EQ(kernel_manager.GetAllOpsKernelInfoStores().size(), 7); | |||
ASSERT_EQ(builder_manager.GetAllOpsKernelBuilders().size(), 7); | |||
ASSERT_EQ(kernel_manager.GetAllOpsKernelInfo().size(), 66); | |||
ASSERT_EQ(kernel_manager.GetAllOpsKernelInfo().size(), 68); | |||
} | |||
TEST_F(GeRunningEvnFakerTest, test_install_fake_engine_with_optimizer_success) { | |||
GeRunningEnvFaker ge_env; | |||
ge_env.Install(FakeEngine("DNN_VM_AICPU")); | |||
ASSERT_EQ(kernel_manager.GetAllOpsKernelInfoStores().size(), 2); | |||
ASSERT_EQ(kernel_manager.GetAllGraphOptimizerObjs().size(), 0); | |||
ASSERT_EQ(builder_manager.GetAllOpsKernelBuilders().size(), 2); | |||
} | |||
TEST_F(GeRunningEvnFakerTest, test_install_fake_engine_with_sub_engines_success) { | |||
GeRunningEnvFaker ge_env; | |||
ge_env.Install(FakeEngine("DNN_VM_AICPU")) | |||
.Install(FakeEngine("AIcoreEngine")) | |||
.Install(FakeCompoundEngine("FFTS+", {"DNN_VM_AICPU", "AIcoreEngine"}).KernelInfoStore("FFTS+")); | |||
ASSERT_EQ(kernel_manager.GetAllOpsKernelInfoStores().size(), 3); | |||
ASSERT_EQ(kernel_manager.GetAllGraphOptimizerObjs().size(), 1); | |||
ASSERT_EQ(builder_manager.GetAllOpsKernelBuilders().size(), 4); | |||
ASSERT_EQ(kernel_manager.GetCompoundEngineContains().size(), 1); | |||
ASSERT_EQ(ge::GELib::GetInstance()->OpsKernelManagerObj().GetCompoundEngineContains().size(), 1); | |||
ASSERT_EQ(ge::GELib::GetInstance()->OpsKernelManagerObj().GetCompoundEngineKernelLibName().size(), 1); | |||
} | |||
FAKE_NS_END |
@@ -0,0 +1,151 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include <gtest/gtest.h> | |||
#include "init/gelib.h" | |||
#include "opskernel_manager/ops_kernel_builder_manager.h" | |||
#include "external/ge/ge_api.h" | |||
#include "ge_running_env/ge_running_env_faker.h" | |||
#include "ge_graph_dsl/graph_dsl.h" | |||
#include "ge_running_env/fake_compound_engine.h" | |||
#include "ge_running_env/fake_op.h" | |||
#include "easy_graph/layout/graph_layout.h" | |||
#include "easy_graph/layout/engines/graph_easy/graph_easy_option.h" | |||
#include "easy_graph/layout/engines/graph_easy/graph_easy_executor.h" | |||
#include "ge_graph_dsl/assert/graph_assert.h" | |||
using namespace std; | |||
using namespace ge; | |||
namespace { | |||
bool IfNodeExist(const ComputeGraphPtr &graph, std::function<bool(const NodePtr &)> filter, bool direct_node_flag = true) { | |||
for (const auto &node : graph->GetNodes(direct_node_flag)) { | |||
if (filter(node)) { | |||
return true; | |||
} | |||
} | |||
return false; | |||
} | |||
void GetSubgraphsWithFilter(const ComputeGraphPtr &graph, std::function<bool(const ComputeGraphPtr &)> filter, | |||
std::vector<ComputeGraphPtr> &subgraphs) { | |||
for (const auto &subgraph : graph->GetAllSubgraphs()) { | |||
if (filter(subgraph)) { | |||
subgraphs.emplace_back(subgraph); | |||
} | |||
} | |||
} | |||
bool IsAllNodeMatch(const ComputeGraphPtr &graph, std::function<bool(const NodePtr &)> filter) { | |||
for (const auto &node : graph->GetAllNodes()) { | |||
if (!filter(node)) { | |||
return false; | |||
} | |||
} | |||
return true; | |||
} | |||
} | |||
class TestFftsPlus : public testing::Test { | |||
protected: | |||
GeRunningEnvFaker ge_env; | |||
EG_NS::GraphEasyExecutor executor; | |||
void SetUp() { | |||
EG_NS::GraphLayout::GetInstance().Config(executor, nullptr); | |||
ge_env.InstallDefault() | |||
.Install(FakeCompoundEngine("FFTS+", {"AIcoreEngine", "DNN_VM_AICPU"}).KernelInfoStore("FFTS+")) | |||
.Install(FakeOp(GETNEXT).InfoStoreAndBuilder("AicpuLib")) | |||
.Install(FakeOp(HCOMREDUCE).InfoStoreAndBuilder("HcclLib")); | |||
} | |||
void TearDown() {} | |||
}; | |||
/* | |||
* g1 | |||
* | |||
* ┌──────────┐ (0,1) ┌────────┐ (0,0) ┌────────┐ | |||
* │ const │ ───────> │ less │ ───────> │ reduce │ | |||
* └──────────┘ └────────┘ └────────┘ | |||
* ∧ | |||
* │ (0,0) | |||
* │ | |||
* ┌──────────┐ (0,0) ┌────────┐ (0,1) ┌────────┐ | |||
* │ get_next │ ───────> │ add │ <─────── │ data1 │ | |||
* └──────────┘ └────────┘ └────────┘ | |||
* | |||
*/ | |||
TEST_F(TestFftsPlus, test_ffts_plus) { | |||
auto tensor = std::make_shared<GeTensor>(); | |||
uint32_t value = 0; | |||
tensor->SetData((uint8_t *)&value, sizeof(uint32_t)); | |||
DEF_GRAPH(g1) { | |||
CHAIN(NODE("get_next", GETNEXT)->NODE("add", ADD)); | |||
CHAIN(NODE("data1", DATA)->NODE("add")->NODE("less", LESS)->NODE("reduce", HCOMREDUCE)); | |||
CHAIN(NODE("const", OP_CFG(CONSTANTOP).Attr("value", tensor))->Node("less")); | |||
}; | |||
auto graph = ToGeGraph(g1); | |||
// new session & add graph | |||
map<AscendString, AscendString> options; | |||
Session session(options); | |||
auto ret = session.AddGraph(1, graph, options); | |||
EXPECT_EQ(ret, SUCCESS); | |||
// build input tensor | |||
std::vector<InputTensorInfo> inputs; | |||
// build_graph through session | |||
ret = session.BuildGraph(1, inputs); | |||
EXPECT_EQ(ret, SUCCESS); | |||
CHECK_GRAPH(PreRunAfterBuild) { | |||
// node exist | |||
ASSERT_FALSE(IfNodeExist(graph, [](const NodePtr &node) { return node->GetName() == "get_next"; })); | |||
ASSERT_FALSE(IfNodeExist(graph, [](const NodePtr &node) { return node->GetName() == "add"; })); | |||
ASSERT_FALSE(IfNodeExist(graph, [](const NodePtr &node) { return node->GetName() == "less"; })); | |||
ASSERT_TRUE(IfNodeExist(graph, [](const NodePtr &node) { return node->GetType() == PARTITIONEDCALL; })); | |||
// subgraph exit | |||
ASSERT_EQ(graph->GetAllSubgraphs().size(), 1); | |||
std::vector<ComputeGraphPtr> subgraphs; | |||
GetSubgraphsWithFilter(graph, | |||
[](const ComputeGraphPtr &graph) { | |||
const auto &parent_node = graph->GetParentNode(); | |||
if ((parent_node == nullptr) || (parent_node->GetOpDesc() == nullptr)) { | |||
return false; | |||
} | |||
return parent_node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_PLUS_SUB_GRAPH); }, | |||
subgraphs); | |||
ASSERT_EQ(subgraphs.size(), 1); | |||
// subgraph node check | |||
const auto &subgraph = subgraphs[0]; | |||
ASSERT_TRUE(subgraph != nullptr); | |||
ASSERT_TRUE(IsAllNodeMatch(subgraph, | |||
[](const NodePtr &node) { | |||
return node->GetOpDesc()->HasAttr(ATTR_NAME_THREAD_SCOPE_ID); | |||
})); | |||
const auto &parent_node = subgraph->GetParentNode(); | |||
ASSERT_TRUE(parent_node != nullptr); | |||
ASSERT_TRUE(parent_node->GetOpDesc() != nullptr); | |||
int64_t stream_id = parent_node->GetOpDesc()->GetStreamId(); | |||
ASSERT_TRUE(IsAllNodeMatch(subgraph, | |||
[stream_id](const NodePtr &node) { | |||
return node->GetOpDesc()->GetStreamId() == stream_id; | |||
})); | |||
}; | |||
} |
@@ -19,6 +19,11 @@ | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "framework/common/types.h" | |||
#include "ge_running_env/ge_running_env_faker.h" | |||
#include "easy_graph/layout/graph_layout.h" | |||
#include "easy_graph/layout/engines/graph_easy/graph_easy_option.h" | |||
#include "easy_graph/layout/engines/graph_easy/graph_easy_executor.h" | |||
#include "ge_graph_dsl/graph_dsl.h" | |||
#include "ge_graph_dsl/assert/graph_assert.h" | |||
@@ -94,9 +99,13 @@ Graph BuildV1ControlFlowGraph() { | |||
} | |||
} // namespace | |||
class FrameworkTest : public testing::Test { | |||
EG_NS::GraphEasyExecutor executor; | |||
protected: | |||
GeRunningEnvFaker ge_env; | |||
void SetUp() { ge_env.InstallDefault(); } | |||
void SetUp() { | |||
ge_env.InstallDefault(); | |||
EG_NS::GraphLayout::GetInstance().Config(executor, nullptr); | |||
} | |||
void TearDown() {} | |||
}; | |||
@@ -21,11 +21,21 @@ | |||
#include "framework/common/types.h" | |||
#include "graph/ge_local_context.h" | |||
#include "ge_graph_dsl/graph_dsl.h" | |||
#include "ge_running_env/ge_running_env_faker.h" | |||
#include "easy_graph/layout/graph_layout.h" | |||
#include "easy_graph/layout/engines/graph_easy/graph_easy_option.h" | |||
#include "easy_graph/layout/engines/graph_easy/graph_easy_executor.h" | |||
namespace ge { | |||
class STEST_opt_info : public testing::Test { | |||
protected: | |||
void SetUp() {} | |||
GeRunningEnvFaker ge_env; | |||
EG_NS::GraphEasyExecutor executor; | |||
void SetUp() { | |||
EG_NS::GraphLayout::GetInstance().Config(executor, nullptr); | |||
ge_env.InstallDefault(); | |||
} | |||
void TearDown() {} | |||
}; | |||
@@ -670,6 +670,7 @@ set(MULTI_PARTS_TEST_FILES | |||
"graph/build/stream_allocator_unittest.cc" | |||
"graph/build/model_builder_unittest.cc" | |||
"graph/build/mem_assigner_unittest.cc" | |||
"graph/build/graph_mem_assigner_unittest.cc" | |||
"graph/build/task_generator_unittest.cc" | |||
"graph/build/buffer_pool_mem_assigner_unittest.cc" | |||
"graph/execute/graph_execute_unittest.cc" | |||
@@ -935,6 +936,7 @@ target_link_libraries(ge_single_op PRIVATE | |||
ascend_protobuf | |||
json | |||
c_sec | |||
runtime_stub | |||
) | |||
# ut binary | |||
@@ -0,0 +1,90 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include <gtest/gtest.h> | |||
#include <memory> | |||
#include "graph/anchor.h" | |||
#include "graph/attr_value.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "graph/utils/graph_utils.h" | |||
#include "graph/utils/node_utils.h" | |||
#include "graph/utils/op_desc_utils.h" | |||
#include "graph/utils/tensor_utils.h" | |||
#include "omg/omg_inner_types.h" | |||
#include "../passes/graph_builder_utils.h" | |||
#define protected public | |||
#define private public | |||
#include "graph/build/memory/binary_block_mem_assigner.h" | |||
#include "graph/build/memory/graph_mem_assigner.h" | |||
#include "graph/build/memory/hybrid_mem_assigner.h" | |||
#include "graph/build/memory/max_block_mem_assigner.h" | |||
#include "graph/manager/graph_var_manager.h" | |||
#include "graph/manager/graph_mem_manager.h" | |||
#undef protected | |||
#undef private | |||
using namespace std; | |||
using namespace testing; | |||
using namespace ge; | |||
using domi::GetContext; | |||
class UtestGraphMemAssigner : public testing::Test { | |||
public: | |||
ge::ComputeGraphPtr BuildGraphWithVar(int64_t session_id) { | |||
// init | |||
MemManager::Instance().Initialize(std::vector<rtMemType_t>({RT_MEMORY_HBM})); | |||
VarManager::Instance(session_id)->Init(0, 0, 0, 0); | |||
ge::ut::GraphBuilder builder("graph"); | |||
auto var_input = builder.AddNode("var", "Variable", 1, 1); | |||
auto const_input = builder.AddNode("const", "Const", 1, 1); | |||
auto assign = builder.AddNode("assgin", "Assign", 2, 1); | |||
// add link | |||
builder.AddDataEdge(var_input, 0, assign, 0); | |||
builder.AddDataEdge(const_input, 0, assign, 1); | |||
// set offset | |||
var_input->GetOpDesc()->SetOutputOffset({10000}); | |||
const_input->GetOpDesc()->SetOutputOffset({1000}); | |||
assign->GetOpDesc()->SetInputOffset({10100, 1000}); | |||
assign->GetOpDesc()->SetOutputOffset({10100}); | |||
// set inner offset | |||
int64_t inner_offset = 100; | |||
ge::AttrUtils::SetInt(assign->GetOpDesc()->MutableInputDesc(0), ATTR_NAME_INNER_OFFSET, inner_offset); | |||
ge::AttrUtils::SetInt(assign->GetOpDesc()->MutableOutputDesc(0), ATTR_NAME_INNER_OFFSET, inner_offset); | |||
// add var addr | |||
VarManager::Instance(session_id)->var_resource_->var_offset_map_.emplace(10000, RT_MEMORY_HBM); | |||
return builder.GetGraph(); | |||
} | |||
protected: | |||
void SetUp() {} | |||
void TearDown() {} | |||
}; | |||
TEST_F(UtestGraphMemAssigner, graph_memory_assign_fail_case) { | |||
ge::ComputeGraphPtr compute_graph = make_shared<ge::ComputeGraph>(""); | |||
GraphMemoryAssigner graph_mem_assigner(compute_graph); | |||
MemoryOffset mem_offset(2, 10000); | |||
graph_mem_assigner.memory_offset_.insert({2, mem_offset}); | |||
VarManager::Instance(0)->graph_mem_max_size_ = 0; | |||
map<uint64_t, size_t> mem_type_to_offset = {}; | |||
Status ret = graph_mem_assigner.ReAssignMemory(false, mem_type_to_offset); | |||
EXPECT_EQ(ret, ACL_ERROR_GE_MEMORY_ALLOCATION); | |||
} | |||
@@ -23,15 +23,20 @@ | |||
#include "graph/load/model_manager/task_info/kernel_ex_task_info.h" | |||
#include "cce/aicpu_engine_struct.h" | |||
#include "tests/depends/runtime/src/runtime_stub.h" | |||
namespace ge { | |||
extern OpDescPtr CreateOpDesc(string name, string type); | |||
class UtestKernelExTaskInfo : public testing::Test { | |||
protected: | |||
void SetUp() {} | |||
void SetUp() { | |||
RTS_STUB_SETUP(); | |||
} | |||
void TearDown() {} | |||
void TearDown() { | |||
RTS_STUB_TEARDOWN(); | |||
} | |||
}; | |||
// test kernel_ex_task_Release | |||
@@ -209,4 +214,136 @@ TEST_F(UtestKernelExTaskInfo, parse_topic_type_failed_2) { | |||
KernelExTaskInfo kernel_ex_task_info; | |||
EXPECT_NE(kernel_ex_task_info.InitTaskExtInfo(ext_info, op_desc), SUCCESS); | |||
} | |||
TEST_F(UtestKernelExTaskInfo, blocking_aicpu_op) { | |||
int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||
vector<char> aicpu_ext_info(len, 0); | |||
char *buf = aicpu_ext_info.data(); | |||
int offset = 0; | |||
hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||
ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||
ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||
offset += sizeof(hybrid::AicpuExtInfo); | |||
hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||
async_wait_info->waitType = 0; | |||
async_wait_info->waitId = 0; | |||
async_wait_info->timeOut = 0; | |||
async_wait_info->reserved = 0; | |||
domi::TaskDef task_def; | |||
domi::KernelExDef kernel_ex_def; | |||
kernel_ex_def.set_kernel_ext_info(buf, len); | |||
kernel_ex_def.set_kernel_ext_info_size(len); | |||
domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex(); | |||
*kernel_ex_def_tmp = kernel_ex_def; | |||
const OpDescPtr op_desc = CreateOpDesc("deque", "Deque"); | |||
ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); | |||
KernelExTaskInfo kernel_ex_task_info; | |||
kernel_ex_task_info.op_desc_ = op_desc; | |||
DavinciModel davinci_model(0, nullptr); | |||
kernel_ex_task_info.davinci_model_ = &davinci_model; | |||
EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS); | |||
EXPECT_EQ(kernel_ex_task_info.Distribute(), SUCCESS); | |||
kernel_ex_task_info.op_desc_ = op_desc; | |||
EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS); | |||
EXPECT_EQ(kernel_ex_task_info.Distribute(), SUCCESS); | |||
} | |||
TEST_F(UtestKernelExTaskInfo, blocking_aicpu_op_fail_01) { | |||
int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||
vector<char> aicpu_ext_info(len, 0); | |||
char *buf = aicpu_ext_info.data(); | |||
int offset = 0; | |||
hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||
ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||
ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||
offset += sizeof(hybrid::AicpuExtInfo); | |||
hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||
async_wait_info->waitType = 0; | |||
async_wait_info->waitId = 0; | |||
async_wait_info->timeOut = 0; | |||
async_wait_info->reserved = 0; | |||
domi::TaskDef task_def; | |||
domi::KernelExDef kernel_ex_def; | |||
kernel_ex_def.set_kernel_ext_info(buf, len); | |||
kernel_ex_def.set_kernel_ext_info_size(len); | |||
domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex(); | |||
*kernel_ex_def_tmp = kernel_ex_def; | |||
const OpDescPtr op_desc = CreateOpDesc("deque", "Deque"); | |||
KernelExTaskInfo kernel_ex_task_info; | |||
kernel_ex_task_info.op_desc_ = op_desc; | |||
DavinciModel davinci_model(0, nullptr); | |||
kernel_ex_task_info.davinci_model_ = &davinci_model; | |||
EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS); | |||
kernel_ex_task_info.is_blocking_aicpu_op_ = true; | |||
EXPECT_EQ(kernel_ex_task_info.Distribute(), FAILED); | |||
} | |||
TEST_F(UtestKernelExTaskInfo, blocking_aicpu_op_fail_02) { | |||
int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||
vector<char> aicpu_ext_info(len, 0); | |||
char *buf = aicpu_ext_info.data(); | |||
int offset = 0; | |||
hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||
ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||
ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||
offset += sizeof(hybrid::AicpuExtInfo); | |||
hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||
async_wait_info->waitType = 0; | |||
async_wait_info->waitId = 0; | |||
async_wait_info->timeOut = 0; | |||
async_wait_info->reserved = 0; | |||
domi::TaskDef task_def; | |||
domi::KernelExDef kernel_ex_def; | |||
kernel_ex_def.set_kernel_ext_info(buf, len); | |||
kernel_ex_def.set_kernel_ext_info_size(len); | |||
domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex(); | |||
*kernel_ex_def_tmp = kernel_ex_def; | |||
const OpDescPtr op_desc = CreateOpDesc("deque", "Deque"); | |||
ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); | |||
KernelExTaskInfo kernel_ex_task_info; | |||
kernel_ex_task_info.op_desc_ = op_desc; | |||
DavinciModel davinci_model(0, nullptr); | |||
kernel_ex_task_info.davinci_model_ = &davinci_model; | |||
RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); | |||
EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||
EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||
EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||
RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1); | |||
EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); | |||
EXPECT_EQ(kernel_ex_task_info.Distribute(), FAILED); | |||
EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS); | |||
RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001); | |||
EXPECT_EQ(kernel_ex_task_info.Distribute(), FAILED); | |||
EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS); | |||
RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001); | |||
EXPECT_EQ(kernel_ex_task_info.Distribute(), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||
RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||
EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(kernel_ex_def.kernel_ext_info(), op_desc), SUCCESS); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||
RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||
EXPECT_EQ(kernel_ex_task_info.Distribute(), SUCCESS); | |||
} | |||
} // namespace ge |
@@ -22,15 +22,20 @@ | |||
#include "graph/load/model_manager/davinci_model.h" | |||
#include "graph/load/model_manager/task_info/kernel_task_info.h" | |||
#include "graph/load/model_manager/task_info/hccl_task_info.h" | |||
#include "tests/depends/runtime/src/runtime_stub.h" | |||
namespace ge { | |||
extern OpDescPtr CreateOpDesc(string name, string type); | |||
class UtestKernelTaskInfo : public testing::Test { | |||
protected: | |||
void SetUp() {} | |||
void SetUp() { | |||
RTS_STUB_SETUP(); | |||
} | |||
void TearDown() {} | |||
void TearDown() { | |||
RTS_STUB_TEARDOWN(); | |||
} | |||
}; | |||
// test KernelTaskInfo Init. | |||
@@ -1240,4 +1245,135 @@ TEST_F(UtestKernelTaskInfo, kernel_task_info_super_kernel_info) { | |||
EXPECT_EQ(kernel_task_info.SKTFinalize(), SUCCESS); | |||
} | |||
TEST_F(UtestKernelTaskInfo, blocking_aicpu_op) { | |||
int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||
vector<char> aicpu_ext_info(len, 0); | |||
char *buf = aicpu_ext_info.data(); | |||
int offset = 0; | |||
hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||
ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||
ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||
offset += sizeof(hybrid::AicpuExtInfo); | |||
hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||
async_wait_info->waitType = 0; | |||
async_wait_info->waitId = 0; | |||
async_wait_info->timeOut = 0; | |||
async_wait_info->reserved = 0; | |||
domi::TaskDef task_def; | |||
domi::KernelDef kernel_def; | |||
kernel_def.set_kernel_ext_info(buf, len); | |||
kernel_def.set_kernel_ext_info_size(len); | |||
const OpDescPtr op_desc = CreateOpDesc("deque", "Deque"); | |||
op_desc->SetId(0); | |||
ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); | |||
DavinciModel davinci_model(0, nullptr); | |||
davinci_model.op_list_.emplace(0, op_desc); | |||
KernelTaskInfo kernel_task_info; | |||
kernel_task_info.op_desc_ = op_desc; | |||
kernel_task_info.davinci_model_ = &davinci_model; | |||
EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS); | |||
EXPECT_EQ(kernel_task_info.Distribute(), SUCCESS); | |||
kernel_task_info.op_desc_ = op_desc; | |||
EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS); | |||
EXPECT_EQ(kernel_task_info.Distribute(), SUCCESS); | |||
} | |||
TEST_F(UtestKernelTaskInfo, blocking_aicpu_op_fail_01) { | |||
int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||
vector<char> aicpu_ext_info(len, 0); | |||
char *buf = aicpu_ext_info.data(); | |||
int offset = 0; | |||
hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||
ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||
ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||
offset += sizeof(hybrid::AicpuExtInfo); | |||
hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||
async_wait_info->waitType = 0; | |||
async_wait_info->waitId = 0; | |||
async_wait_info->timeOut = 0; | |||
async_wait_info->reserved = 0; | |||
domi::KernelDef kernel_def; | |||
kernel_def.set_kernel_ext_info(buf, len); | |||
kernel_def.set_kernel_ext_info_size(len); | |||
const OpDescPtr op_desc = CreateOpDesc("deque", "Deque"); | |||
op_desc->SetId(0); | |||
DavinciModel davinci_model(0, nullptr); | |||
davinci_model.op_list_.emplace(0, op_desc); | |||
KernelTaskInfo kernel_task_info; | |||
kernel_task_info.davinci_model_ = &davinci_model; | |||
kernel_task_info.op_desc_ = op_desc; | |||
EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS); | |||
kernel_task_info.is_blocking_aicpu_op_ = true; | |||
EXPECT_EQ(kernel_task_info.Distribute(), FAILED); | |||
} | |||
TEST_F(UtestKernelTaskInfo, blocking_aicpu_op_fail_02) { | |||
int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||
vector<char> aicpu_ext_info(len, 0); | |||
char *buf = aicpu_ext_info.data(); | |||
int offset = 0; | |||
hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||
ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||
ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||
offset += sizeof(hybrid::AicpuExtInfo); | |||
hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||
async_wait_info->waitType = 0; | |||
async_wait_info->waitId = 0; | |||
async_wait_info->timeOut = 0; | |||
async_wait_info->reserved = 0; | |||
domi::KernelDef kernel_def; | |||
kernel_def.set_kernel_ext_info(buf, len); | |||
kernel_def.set_kernel_ext_info_size(len); | |||
const OpDescPtr op_desc = CreateOpDesc("deque", "Deque"); | |||
ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); | |||
op_desc->SetId(0); | |||
DavinciModel davinci_model(0, nullptr); | |||
davinci_model.op_list_.emplace(0, op_desc); | |||
KernelTaskInfo kernel_task_info; | |||
kernel_task_info.davinci_model_ = &davinci_model; | |||
kernel_task_info.op_desc_ = op_desc; | |||
RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); | |||
EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||
EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||
EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||
RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1); | |||
EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); | |||
EXPECT_EQ(kernel_task_info.Distribute(), FAILED); | |||
EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS); | |||
RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001); | |||
EXPECT_EQ(kernel_task_info.Distribute(), FAILED); | |||
EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS); | |||
RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001); | |||
EXPECT_EQ(kernel_task_info.Distribute(), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||
RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||
EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(kernel_def.kernel_ext_info()), SUCCESS); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||
RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||
EXPECT_EQ(kernel_task_info.Distribute(), SUCCESS); | |||
} | |||
} // namespace ge |
@@ -131,7 +131,7 @@ TEST_F(UtestGraphOptimizeTest, test_OptimizeAfterStage1_succ) { | |||
shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
EXPECT_NE(instance_ptr, nullptr); | |||
GraphOptimizerPtr graph_opt = MakeShared<TestGraphOptimizerSuccess>(); | |||
instance_ptr->opsManager_.graph_optimizers_by_priority_.push_back(make_pair("AIcoreEngine", graph_opt)); | |||
instance_ptr->opsManager_.atomic_first_optimizers_by_priority_.push_back(make_pair("AIcoreEngine", graph_opt)); | |||
ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>("test_graph"); | |||
GraphOptimize base_optimize; | |||
@@ -167,7 +167,7 @@ TEST_F(UtestGraphOptimizeTest, test_OptimizeAfterStage1_fail) { | |||
shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
EXPECT_NE(instance_ptr, nullptr); | |||
GraphOptimizerPtr graph_opt = MakeShared<TestGraphOptimizerFail>(); | |||
instance_ptr->opsManager_.graph_optimizers_by_priority_.push_back(make_pair("AIcoreEngine", graph_opt)); | |||
instance_ptr->opsManager_.atomic_first_optimizers_by_priority_.push_back(make_pair("AIcoreEngine", graph_opt)); | |||
ret = base_optimize.OptimizeAfterStage1(compute_graph); | |||
EXPECT_EQ(ret, FAILED); | |||
@@ -183,7 +183,7 @@ TEST_F(UtestGraphOptimizeTest, test_optimizers_succ) { | |||
shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
EXPECT_NE(instance_ptr, nullptr); | |||
GraphOptimizerPtr graph_opt = MakeShared<TestGraphOptimizerSuccess>(); | |||
instance_ptr->opsManager_.graph_optimizers_by_priority_.push_back(make_pair("AIcoreEngine", graph_opt)); | |||
instance_ptr->opsManager_.atomic_first_optimizers_by_priority_.push_back(make_pair("AIcoreEngine", graph_opt)); | |||
ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>("test_graph"); | |||
GraphOptimize base_optimize; | |||
@@ -197,7 +197,7 @@ TEST_F(UtestGraphOptimizeTest, test_optimizers_succ) { | |||
ret = base_optimize.OptimizeOriginalGraphForQuantize(compute_graph); | |||
EXPECT_EQ(ret, SUCCESS); | |||
ret = base_optimize.OptimizeGraphBeforeBuildForRts(compute_graph); | |||
ret = base_optimize.OptimizeGraphBeforeBuild(compute_graph); | |||
EXPECT_EQ(ret, SUCCESS); | |||
ret = base_optimize.OptimizeWholeGraph(compute_graph); | |||
@@ -215,7 +215,7 @@ TEST_F(UtestGraphOptimizeTest, test_optimizers_fail) { | |||
shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
EXPECT_NE(instance_ptr, nullptr); | |||
GraphOptimizerPtr graph_opt = MakeShared<TestGraphOptimizerFail>(); | |||
instance_ptr->opsManager_.graph_optimizers_by_priority_.push_back(make_pair("AIcoreEngine", graph_opt)); | |||
instance_ptr->opsManager_.atomic_first_optimizers_by_priority_.push_back(make_pair("AIcoreEngine", graph_opt)); | |||
ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>("test_graph"); | |||
GraphOptimize base_optimize; | |||
@@ -229,7 +229,7 @@ TEST_F(UtestGraphOptimizeTest, test_optimizers_fail) { | |||
ret = base_optimize.OptimizeOriginalGraphForQuantize(compute_graph); | |||
EXPECT_EQ(ret, FAILED); | |||
ret = base_optimize.OptimizeGraphBeforeBuildForRts(compute_graph); | |||
ret = base_optimize.OptimizeGraphBeforeBuild(compute_graph); | |||
EXPECT_EQ(ret, FAILED); | |||
ret = base_optimize.OptimizeWholeGraph(compute_graph); | |||
@@ -367,7 +367,7 @@ TEST(UtestIrBuild, check_data_op_attr_index_valid) { | |||
}; | |||
ModelBufferData model; | |||
graphStatus ret = aclgrphBuildModel(graph, build_options, model); | |||
EXPECT_EQ(ret, GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED); | |||
EXPECT_EQ(ret, ge::FAILED); | |||
} | |||
// set attr index invalid, when not set input shape range | |||
@@ -377,7 +377,7 @@ TEST(UtestIrBuild, check_data_attr_index_succ_no_input_range) { | |||
const map<string, string> build_options; | |||
ModelBufferData model; | |||
graphStatus ret = aclgrphBuildModel(graph, build_options, model); | |||
EXPECT_EQ(ret, GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED); | |||
EXPECT_EQ(ret, ge::FAILED); | |||
} | |||
TEST(UtestIrBuild, check_modify_mixlist_param) { | |||
@@ -27,7 +27,7 @@ | |||
#include "hybrid/node_executor/aicpu/aicpu_node_executor.h" | |||
#undef protected | |||
#undef private | |||
#include "tests/depends/runtime/src/runtime_stub.h" | |||
using namespace std; | |||
using namespace testing; | |||
@@ -43,8 +43,12 @@ using namespace hybrid; | |||
class UtestAicpuNodeExecutor : public testing::Test { | |||
protected: | |||
void SetUp() {} | |||
void TearDown() {} | |||
void SetUp() { | |||
RTS_STUB_SETUP(); | |||
} | |||
void TearDown() { | |||
RTS_STUB_TEARDOWN(); | |||
} | |||
}; | |||
static NodePtr CreateNode(ComputeGraphPtr graph, const string &name, const string &type, int in_num, int out_num) { | |||
@@ -164,5 +168,222 @@ TEST_F(UtestAicpuNodeExecutor, aicpu_tf_node_task) { | |||
} | |||
TEST_F(UtestAicpuNodeExecutor, aicpu_blocking_node_task) { | |||
ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||
GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph); | |||
ge_root_model->SetModelName("test_name"); | |||
HybridModel hybrid_model(ge_root_model); | |||
NodePtr node = CreateNode(graph, "deque", FRAMEWORK_OP_TYPE, 1, 1); | |||
ge::AttrUtils::SetBool(node->GetOpDesc(), ATTR_NAME_IS_BLOCKING_OP, true); | |||
std::unique_ptr<NodeItem> new_node; | |||
ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS); | |||
NodeItem *node_item = new_node.get(); | |||
node_item->input_start = 0; | |||
node_item->output_start = 0; | |||
node_item->is_dynamic = true; | |||
node_item->shape_inference_type = DEPEND_SHAPE_RANGE; | |||
GraphItem graph_item; | |||
graph_item.node_items_.emplace_back(node_item); | |||
graph_item.total_inputs_ = 1; | |||
graph_item.total_outputs_ = 1; | |||
GraphExecutionContext graph_execution_context; | |||
SubgraphContext subgraph_context(&graph_item, &graph_execution_context); | |||
ASSERT_EQ(subgraph_context.Init(), SUCCESS); | |||
graph_execution_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager()); | |||
auto node_state = subgraph_context.GetOrCreateNodeState(node_item); | |||
ASSERT_NE(node_state, nullptr); | |||
uint64_t value_0 = 512; | |||
TensorValue in_tensor0(&value_0, sizeof(value_0)); | |||
subgraph_context.SetInput(*node_item, 0, in_tensor0); | |||
TensorValue out_tensor0(&value_0, sizeof(value_0)); | |||
subgraph_context.SetOutput(*node_item, 0, out_tensor0); | |||
int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||
vector<char> aicpu_ext_info(len, 0); | |||
char *buf = aicpu_ext_info.data(); | |||
int offset = 0; | |||
hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||
ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||
ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||
offset += sizeof(hybrid::AicpuExtInfo); | |||
hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||
async_wait_info->waitType = 0; | |||
async_wait_info->waitId = 0; | |||
async_wait_info->timeOut = 0; | |||
async_wait_info->reserved = 0; | |||
domi::KernelDef kernel_def; | |||
kernel_def.set_kernel_ext_info(buf, len); | |||
kernel_def.set_kernel_ext_info_size(len); | |||
domi::TaskDef task_def; | |||
AicpuTaskStruct args; | |||
args.head.length = sizeof(args); | |||
args.head.ioAddrNum = 2; | |||
kernel_def.set_args(reinterpret_cast<const char *>(&args), args.head.length); | |||
kernel_def.set_args_size(args.head.length); | |||
domi::KernelDef *kernel_def_tmp = task_def.mutable_kernel(); | |||
*kernel_def_tmp = kernel_def; | |||
AicpuNodeTask aicpu_node_task(node_item, task_def); | |||
ASSERT_EQ(aicpu_node_task.Init(hybrid_model), SUCCESS); | |||
ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); | |||
node_item->shape_inference_type = DEPEND_COMPUTE; | |||
domi::KernelExDef kernel_ex_def; | |||
kernel_ex_def.set_kernel_ext_info(buf, len); | |||
kernel_ex_def.set_kernel_ext_info_size(len); | |||
kernel_ex_def.set_args(reinterpret_cast<const char *>(&args), args.head.length); | |||
kernel_ex_def.set_args_size(args.head.length); | |||
domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex(); | |||
*kernel_ex_def_tmp = kernel_ex_def; | |||
hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def, task_def}); | |||
AicpuTfNodeTask aicpu_tf_node_task(node_item, task_def); | |||
ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS); | |||
ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); | |||
} | |||
TEST_F(UtestAicpuNodeExecutor, aicpu_blocking_node_task_fail) { | |||
ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||
GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph); | |||
ge_root_model->SetModelName("test_name"); | |||
HybridModel hybrid_model(ge_root_model); | |||
NodePtr node = CreateNode(graph, "deque", FRAMEWORK_OP_TYPE, 1, 1); | |||
ge::AttrUtils::SetBool(node->GetOpDesc(), ATTR_NAME_IS_BLOCKING_OP, true); | |||
std::unique_ptr<NodeItem> new_node; | |||
ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS); | |||
NodeItem *node_item = new_node.get(); | |||
node_item->input_start = 0; | |||
node_item->output_start = 0; | |||
node_item->is_dynamic = true; | |||
node_item->shape_inference_type = DEPEND_SHAPE_RANGE; | |||
GraphItem graph_item; | |||
graph_item.node_items_.emplace_back(node_item); | |||
graph_item.total_inputs_ = 1; | |||
graph_item.total_outputs_ = 1; | |||
GraphExecutionContext graph_execution_context; | |||
SubgraphContext subgraph_context(&graph_item, &graph_execution_context); | |||
ASSERT_EQ(subgraph_context.Init(), SUCCESS); | |||
graph_execution_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager()); | |||
auto node_state = subgraph_context.GetOrCreateNodeState(node_item); | |||
ASSERT_NE(node_state, nullptr); | |||
uint64_t value_0 = 512; | |||
TensorValue in_tensor0(&value_0, sizeof(value_0)); | |||
subgraph_context.SetInput(*node_item, 0, in_tensor0); | |||
TensorValue out_tensor0(&value_0, sizeof(value_0)); | |||
subgraph_context.SetOutput(*node_item, 0, out_tensor0); | |||
int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||
vector<char> aicpu_ext_info(len, 0); | |||
char *buf = aicpu_ext_info.data(); | |||
int offset = 0; | |||
hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||
ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||
ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||
offset += sizeof(hybrid::AicpuExtInfo); | |||
hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||
async_wait_info->waitType = 0; | |||
async_wait_info->waitId = 0; | |||
async_wait_info->timeOut = 0; | |||
async_wait_info->reserved = 0; | |||
domi::KernelDef kernel_def; | |||
kernel_def.set_kernel_ext_info(buf, len); | |||
kernel_def.set_kernel_ext_info_size(len); | |||
domi::TaskDef task_def; | |||
AicpuTaskStruct args; | |||
args.head.length = sizeof(args); | |||
args.head.ioAddrNum = 2; | |||
kernel_def.set_args(reinterpret_cast<const char *>(&args), args.head.length); | |||
kernel_def.set_args_size(args.head.length); | |||
domi::KernelDef *kernel_def_tmp = task_def.mutable_kernel(); | |||
*kernel_def_tmp = kernel_def; | |||
AicpuNodeTask aicpu_node_task(node_item, task_def); | |||
RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); | |||
ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||
ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||
ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||
RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1); | |||
ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); | |||
ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED); | |||
ASSERT_EQ(aicpu_node_task.Init(hybrid_model), SUCCESS); | |||
RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001); | |||
ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED); | |||
ASSERT_EQ(aicpu_node_task.Init(hybrid_model), SUCCESS); | |||
RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001); | |||
ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||
RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||
ASSERT_EQ(aicpu_node_task.Init(hybrid_model), SUCCESS); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||
RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||
ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); | |||
node_item->shape_inference_type = DEPEND_COMPUTE; | |||
domi::KernelExDef kernel_ex_def; | |||
kernel_ex_def.set_kernel_ext_info(buf, len); | |||
kernel_ex_def.set_kernel_ext_info_size(len); | |||
kernel_ex_def.set_args(reinterpret_cast<const char *>(&args), args.head.length); | |||
kernel_ex_def.set_args_size(args.head.length); | |||
domi::KernelExDef *kernel_ex_def_tmp = task_def.mutable_kernel_ex(); | |||
*kernel_ex_def_tmp = kernel_ex_def; | |||
hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def, task_def}); | |||
AicpuTfNodeTask aicpu_tf_node_task(node_item, task_def); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||
ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||
ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||
RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1); | |||
ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), FAILED); | |||
ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS); | |||
RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001); | |||
ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED); | |||
ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS); | |||
RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001); | |||
ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||
RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||
EXPECT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||
RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||
EXPECT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); | |||
} | |||
} // namespace ge | |||
@@ -19,6 +19,7 @@ | |||
#include "graph/load/model_manager/model_utils.h" | |||
#include "graph/utils/graph_utils.h" | |||
#include "hybrid/node_executor/aicpu/aicpu_ext_info.h" | |||
#include "runtime/rt.h" | |||
#define protected public | |||
@@ -30,6 +31,7 @@ | |||
#include "external/register/op_tiling_registry.h" | |||
#undef private | |||
#undef protected | |||
#include "tests/depends/runtime/src/runtime_stub.h" | |||
using namespace std; | |||
using namespace testing; | |||
@@ -38,9 +40,13 @@ using namespace optiling; | |||
class UtestSingleOpTask : public testing::Test { | |||
protected: | |||
void SetUp() {} | |||
void SetUp() { | |||
RTS_STUB_SETUP(); | |||
} | |||
void TearDown() {} | |||
void TearDown() { | |||
RTS_STUB_TEARDOWN(); | |||
} | |||
}; | |||
TEST_F(UtestSingleOpTask, test_build_kernel_task) { | |||
@@ -237,3 +243,124 @@ TEST_F(UtestSingleOpTask, test_aicpu_task_update_io_addr) { | |||
ASSERT_EQ(ret, PARAM_INVALID); | |||
} | |||
} | |||
TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_01) { | |||
int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||
vector<char> aicpu_ext_info(len, 0); | |||
char *buf = aicpu_ext_info.data(); | |||
int offset = 0; | |||
hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||
ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||
ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||
offset += sizeof(hybrid::AicpuExtInfo); | |||
hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||
async_wait_info->waitType = 0; | |||
async_wait_info->waitId = 0; | |||
async_wait_info->timeOut = 0; | |||
async_wait_info->reserved = 0; | |||
domi::KernelDef kernel_def; | |||
kernel_def.set_kernel_ext_info(buf, len); | |||
kernel_def.set_kernel_ext_info_size(len); | |||
auto op_desc = make_shared<OpDesc>("deque", "Deque"); | |||
ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); | |||
AiCpuCCTask aicpu_task; | |||
aicpu_task.SetOpDesc(op_desc); | |||
rtStream_t stream; | |||
ASSERT_EQ(rtStreamCreate(&stream, 0), RT_ERROR_NONE); | |||
ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS); | |||
ASSERT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS); | |||
} | |||
TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_02) { | |||
int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||
vector<char> aicpu_ext_info(len, 0); | |||
char *buf = aicpu_ext_info.data(); | |||
int offset = 0; | |||
hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||
ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||
ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||
offset += sizeof(hybrid::AicpuExtInfo); | |||
hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||
async_wait_info->waitType = 0; | |||
async_wait_info->waitId = 0; | |||
async_wait_info->timeOut = 0; | |||
async_wait_info->reserved = 0; | |||
domi::KernelDef kernel_def; | |||
kernel_def.set_kernel_ext_info(buf, len); | |||
kernel_def.set_kernel_ext_info_size(len); | |||
auto op_desc = make_shared<OpDesc>("deque", "Deque"); | |||
ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); | |||
AiCpuTask aicpu_task; | |||
aicpu_task.SetOpDesc(op_desc); | |||
rtStream_t stream; | |||
ASSERT_EQ(rtStreamCreate(&stream, 0), RT_ERROR_NONE); | |||
ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS); | |||
ASSERT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS); | |||
} | |||
TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_fail) { | |||
int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo); | |||
vector<char> aicpu_ext_info(len, 0); | |||
char *buf = aicpu_ext_info.data(); | |||
int offset = 0; | |||
hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset); | |||
ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT; | |||
ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo); | |||
offset += sizeof(hybrid::AicpuExtInfo); | |||
hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset); | |||
async_wait_info->waitType = 0; | |||
async_wait_info->waitId = 0; | |||
async_wait_info->timeOut = 0; | |||
async_wait_info->reserved = 0; | |||
domi::KernelDef kernel_def; | |||
kernel_def.set_kernel_ext_info(buf, len); | |||
kernel_def.set_kernel_ext_info_size(len); | |||
auto op_desc = make_shared<OpDesc>("deque", "Deque"); | |||
ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true); | |||
AiCpuTask aicpu_task; | |||
aicpu_task.SetOpDesc(op_desc); | |||
rtStream_t stream; | |||
ASSERT_EQ(rtStreamCreate(&stream, 0), RT_ERROR_NONE); | |||
ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS); | |||
ASSERT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS); | |||
RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); | |||
ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||
ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001); | |||
ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||
RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1); | |||
ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001); | |||
ASSERT_EQ(aicpu_task.LaunchKernel(stream), FAILED); | |||
ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS); | |||
RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001); | |||
ASSERT_EQ(aicpu_task.LaunchKernel(stream), FAILED); | |||
ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS); | |||
RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001); | |||
ASSERT_EQ(aicpu_task.LaunchKernel(stream), FAILED); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||
RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||
EXPECT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS); | |||
RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE); | |||
RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT); | |||
EXPECT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS); | |||
} |
@@ -62,6 +62,7 @@ enum FWKTaskExtInfoType { | |||
FWK_ADPT_EXT_SESSION_INFO, | |||
FWK_ADPT_EXT_BITMAP, | |||
FWK_ADPT_EXT_TOPIC_TYPE, | |||
FWK_ADPT_EXT_ASYNCWAIT, | |||
FWK_ADPT_EXT_INVALID | |||
}; | |||
@@ -80,6 +81,12 @@ enum FWKExtUpdateAddrType { | |||
FWK_ADPT_UPDATE_INPUT_OUTPUT | |||
}; | |||
enum FWKExtWaitType { | |||
FWK_ADPT_WAIT_TYPE_NULL = 0, | |||
FWK_ADPT_WAIT_TYPE_EVENT, | |||
FWK_ADPT_WAIT_TYPE_INVALID | |||
}; | |||
#pragma pack(push, 1) | |||
// API Parameter Structure | |||
struct StrFWKKernel { | |||
@@ -133,6 +140,15 @@ struct ResultSummary { | |||
uint64_t raw_data_size; // size of raw data | |||
}; | |||
#pragma pack(pop) | |||
#pragma pack(push, 1) | |||
struct AsyncWait { | |||
uint8_t waitType; // wait type, FWK_ADPT_WAIT_TYPE_EVENT: event wait | |||
uint32_t waitId; // wait id, GE refresh | |||
uint32_t timeOut; // reserved | |||
uint64_t reserved; | |||
}; | |||
#pragma pack(pop) | |||
} // end namespace FWKAdapter | |||
} // namespace aicpu | |||
@@ -52,6 +52,14 @@ typedef enum tagRtAicpuScheType { | |||
SCHEDULE_HARDWARE, /* HWTS Schedule */ | |||
} rtAicpuScheType; | |||
typedef enum tagRtDeviceCapabilityType { | |||
RT_SCHEDULE_SOFTWARE = 0, // SoftWare Schedule | |||
RT_SCHEDULE_SOFTWARE_OPT, | |||
RT_SCHEDULE_HARDWARE, // HWTS Schedule | |||
RT_AICPU_BLOCKING_OP_NOT_SUPPORT, | |||
RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation | |||
} rtDeviceCapabilityType; | |||
typedef enum tagRtVersion { | |||
VER_BEGIN = 0, | |||
VER_NA = VER_BEGIN, | |||
@@ -65,6 +65,7 @@ typedef enum tagRtFeatureType { | |||
typedef enum tagRtDeviceFeatureType { | |||
FEATURE_TYPE_SCHE, | |||
FEATURE_TYPE_BLOCKING_OPERATOR, | |||
FEATURE_TYPE_END, | |||
} rtDeviceFeatureType_t; | |||
@@ -78,6 +79,17 @@ typedef enum tagMemoryInfo { | |||
MEMORY_INFO_RSV | |||
} rtMemoryInfo_t; | |||
typedef enum tagRtDeviceModuleType { | |||
RT_MODULE_TYPE_SYSTEM = 0, | |||
RT_MODULE_TYPE_AICPU, | |||
RT_MODULE_TYPE_CCPU, | |||
RT_MODULE_TYPE_DCPU, | |||
RT_MODULE_TYPE_AICORE, | |||
RT_MODULE_TYPE_TSCPU, | |||
RT_MODULE_TYPE_PCIE, | |||
RT_MODULE_TYPE_VECTOR_CORE | |||
} tagRtDeviceModuleType_t; | |||
/** | |||
* @ingroup dvrt_dev | |||
* @brief get total device number. | |||
@@ -356,7 +356,7 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void * | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize, | |||
rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo); | |||
rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo); | |||
/** | |||
* @ingroup rt_kernel | |||
@@ -652,4 +652,3 @@ RTS_API rtError_t rtStopMDCProfiler(void *addr); | |||
#endif | |||
#endif // __CCE_RUNTIME_KERNEL_H__ | |||
@@ -28,5 +28,7 @@ | |||
#include "rt_model.h" | |||
#include "stream.h" | |||
#include "rt_ffts.h" | |||
#include "rt_ffts_plus.h" | |||
#include "rt_ffts_plus_define.h" | |||
#endif // __CCE_RUNTIME_RT_H__ |
@@ -0,0 +1,33 @@ | |||
/* | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||
* Description: ffts plus interface | |||
*/ | |||
#ifndef __CCE_RUNTIME_FFTS_PLUS_H | |||
#define __CCE_RUNTIME_FFTS_PLUS_H | |||
#include "base.h" | |||
#include "rt_stars_define.h" | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
extern "C" { | |||
#endif | |||
#pragma pack(push) | |||
#pragma pack (1) | |||
typedef struct tagFftsPlusTaskInfo { | |||
const rtFftsPlusSqe_t *fftsPlusSqe; | |||
const void *descBuf; // include total context | |||
size_t descBufLen; // the length of descBuf | |||
} rtFftsPlusTaskInfo_t; | |||
#pragma pack(pop) | |||
RTS_API rtError_t rtGetAddrAndPrefCntWithHandle(void *handle, const void *devFunc, void **addr, uint32_t *prefetchCnt); | |||
RTS_API rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stream); | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
#endif // __CCE_RUNTIME_FFTS_H |
@@ -0,0 +1,682 @@ | |||
/* | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||
* Description: the definition of ffts plus | |||
*/ | |||
#ifndef __CCE_RUNTIME_FFTS_PLUS_DEFINE_H | |||
#define __CCE_RUNTIME_FFTS_PLUS_DEFINE_H | |||
#include "base.h" | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
extern "C" { | |||
#endif | |||
#pragma pack(push) | |||
#pragma pack (1) | |||
// hardware context type | |||
typedef enum tagFftsPlusHwType { | |||
RT_HW_CTX_TYPE_AIC = 0, | |||
RT_HW_CTX_TYPE_AIV = 1, | |||
RT_HW_CTX_TYPE_NOTIFY_WAIT = 3, | |||
RT_HW_CTX_TYPE_NOTIFY_RECORD = 4, | |||
RT_HW_CTX_TYPE_WRITE_VALUE = 5, | |||
RT_HW_CTX_TYPE_MIX_AIC = 6, | |||
RT_HW_CTX_TYPE_MIX_AIV = 7, | |||
RT_HW_CTX_TYPE_SDMA = 8, | |||
RT_HW_CTX_TYPE_FLUSH_DATA = 9, | |||
RT_HW_CTX_TYPE_INVALIDATE_DATA = 10, | |||
RT_HW_CTX_TYPE_WRITEBACK_DATA = 11, | |||
RT_HW_CTX_TYPE_AICPU = 12, | |||
RT_HW_CTX_TYPE_LOAD = 13, | |||
RT_HW_CTX_TYPE_MAX, | |||
}rtFftsPlusHwType_t; | |||
// hardware context type | |||
typedef enum tagFftsPlusSoftType { | |||
RT_SOFT_CTX_TYPE_COND_SWITCH = 1, | |||
RT_SOFT_CTX_TYPE_CASE_SWITCH = 2, | |||
RT_SOFT_CTX_TYPE_AT_START = 3, | |||
RT_SOFT_CTX_TYPE_AT_END = 4, | |||
RT_SOFT_CTX_TYPE_LABEL = 5, | |||
RT_SOFT_CTX_TYPE_MAX, | |||
}rtFftsPlusSoftType_t; | |||
// condition type | |||
typedef enum tagFftsPlusCondType { | |||
RT_COND_TYPE_EQUAL = 0, | |||
RT_COND_TYPE_NOTEQUAL = 1, | |||
RT_COND_TYPE_GREATER = 2, | |||
RT_COND_TYPE_GREATER_OR_EQUAL = 3, | |||
RT_COND_TYPE_LESS = 4, | |||
RT_COND_TYPE_LESS_OR_EQUAL = 5, | |||
RT_COND_TYPE_MAX, | |||
}rtFftsPlusCondType_t; | |||
// the definition of ffts plus context | |||
#define RT_CTX_SUCCESSOR_NUM 26 | |||
// ffts plus common context | |||
typedef struct tagFftsPlusComCtx { | |||
// 0-3 bytes | |||
uint8_t hardwareContextType; | |||
uint8_t softwareContextType; | |||
uint8_t successorNum; | |||
uint8_t res1 : 7; | |||
uint8_t aten : 1; | |||
// 4-7 | |||
uint8_t res2; | |||
uint8_t res3; | |||
uint8_t predCntInit; | |||
uint8_t predCnt; | |||
// 8-11 | |||
uint32_t res4; | |||
// 12-63 | |||
uint16_t successorList[RT_CTX_SUCCESSOR_NUM]; | |||
// 64-71 | |||
uint32_t res5[2]; | |||
// 72-75 | |||
uint16_t threadId; | |||
uint16_t threadDim; | |||
// 76-127 | |||
uint32_t res6[13]; | |||
} rtFftsPlusComCtx_t; | |||
// aic/aiv context | |||
typedef struct tagFftsPlusAicAivCtx { | |||
// 0-3 bytes | |||
uint8_t hardwareContextType; | |||
uint8_t softwareContextType; | |||
uint8_t successorNum; | |||
uint8_t res1: 7; | |||
uint8_t aten: 1; | |||
// 4-7 | |||
uint8_t res2; | |||
uint8_t res3; | |||
uint8_t predCntInit; | |||
uint8_t predCnt; | |||
// 8-11 | |||
uint32_t res4; | |||
// 12-63 | |||
uint16_t successorList[RT_CTX_SUCCESSOR_NUM]; | |||
// 64-67 | |||
uint16_t stat: 1; | |||
uint16_t schem: 2; | |||
uint16_t icachePrefetchCnt: 5; | |||
uint16_t res5: 7; | |||
uint16_t atm: 1; | |||
uint16_t prefetchEnableBitmap: 4; | |||
uint16_t res6: 4; | |||
uint16_t prefetchOnceBitmap: 4; | |||
uint16_t res7: 4; | |||
// 68-71 | |||
uint32_t res8; | |||
// 72-75 | |||
uint16_t threadId; | |||
uint16_t threadDim; | |||
// 76-79 | |||
uint16_t nonTailBlockdim; | |||
uint16_t tailBlockdim; | |||
// 80-83 | |||
uint32_t taskParamPtrBaseL; | |||
// 84-87 | |||
uint16_t taskParamPtrBaseH; | |||
uint16_t taskParamPtrOffset; | |||
// 88-95 | |||
uint32_t res9; | |||
uint32_t res10; | |||
// 96-103 | |||
uint32_t nonTailTaskStartPcL; | |||
uint16_t nonTailTaskStartPcH; | |||
uint16_t res11; | |||
// 104-111 | |||
uint32_t tailTaskStartPcL; | |||
uint16_t tailTaskStartPcH; | |||
uint16_t res12; | |||
// 112-119 | |||
uint32_t res13; | |||
uint32_t res14; | |||
// 120-127 | |||
uint16_t srcSlot[4]; // src_slot0-3(context ID for source data which is out of subgraph) | |||
} rtFftsPlusAicAivCtx_t; | |||
// mix aic/aiv context | |||
typedef struct tagFftsPlusMixAicAivCtx { | |||
// 0-3 bytes | |||
uint8_t hardwareContextType; | |||
uint8_t softwareContextType; | |||
uint8_t successorNum; | |||
uint8_t res1: 7; | |||
uint8_t aten: 1; | |||
// 4-7 | |||
uint8_t res2; | |||
uint8_t res3; | |||
uint8_t predCntInit; | |||
uint8_t predCnt; | |||
// 8-11 | |||
uint32_t res4; | |||
// 12-63 | |||
uint16_t successorList[RT_CTX_SUCCESSOR_NUM]; | |||
// 64-67 | |||
uint16_t stat: 1; | |||
uint16_t schem: 2; | |||
uint16_t icachePrefetchCnt: 5; | |||
uint16_t res5: 7; | |||
uint16_t atm: 1; | |||
uint16_t prefetchEnableBitmap: 4; | |||
uint16_t res6: 4; | |||
uint16_t prefetchOnceBitmap: 4; | |||
uint16_t res7: 4; | |||
// 68-71 | |||
uint16_t res8; | |||
uint8_t nonTailBlockRatioN; | |||
uint8_t tailBlockRatioN; | |||
// 72-75 | |||
uint16_t threadId; | |||
uint16_t threadDim; | |||
// 76-79 | |||
uint16_t nonTailBlockdim; | |||
uint16_t tailBlockdim; | |||
// 80-87 | |||
uint32_t aicTaskParamPtrL; | |||
uint16_t aicTaskParamPtrH; | |||
uint16_t aicTaskParamPtrOffset; | |||
// 88-95 | |||
uint32_t aivTaskParamPtrL; | |||
uint16_t aivTaskParamPtrH; | |||
uint16_t aivTaskParamPtrOffset; | |||
// 96-103 | |||
uint32_t nonTailAicTaskStartPcL; | |||
uint16_t nonTailAicTaskStartPcH; | |||
uint16_t tailAicTaskStartPcH; | |||
// 104-111 | |||
uint32_t tailAicTaskStartPcL; | |||
uint32_t nonTailAivTaskStartPcL; | |||
// 112-119 | |||
uint16_t nontailAivTaskStartPcH; | |||
uint16_t tailAivTaskStartPcH; | |||
uint32_t tailAivTaskStartPcL; | |||
// 120-127 | |||
uint16_t srcSlot[4]; // src_slot0-3(context ID for source data which is out of subgraph) | |||
} rtFftsPlusMixAicAivCtx_t; | |||
// adma context | |||
typedef struct tagFftsPlusSdmaCtx { | |||
// 0-3 bytes | |||
uint8_t hardwareContextType; | |||
uint8_t softwareContextType; | |||
uint8_t successorNum; | |||
uint8_t res1: 7; | |||
uint8_t aten: 1; | |||
// 4-7 | |||
uint8_t res2; | |||
uint8_t res3; | |||
uint8_t predCntInit; | |||
uint8_t predCnt; | |||
// 8-11 | |||
uint32_t res4; | |||
// 12-63 | |||
uint16_t successorList[RT_CTX_SUCCESSOR_NUM]; | |||
// 64-67 | |||
uint8_t sat: 1; | |||
uint8_t res5: 7; | |||
uint8_t res6: 7; | |||
uint8_t atm: 1; | |||
uint16_t res7; | |||
// 68-71 | |||
uint32_t res8; | |||
// 72-75 | |||
uint16_t threadId; | |||
uint16_t threadDim; | |||
// 76-79 | |||
uint32_t sdmaSqeHeader; // (FORMAT/MPAMNS/PARTID/DRO/SRO/QOS/DNS/SNS/DSSV/SSSV/IE/UPCODE) | |||
// 80-83 | |||
uint16_t sourceStreamId; | |||
uint16_t sourceSubstreamId; | |||
// 84-87 | |||
uint16_t destinationStreamId; | |||
uint16_t destinationSubstreamId; | |||
// 88-127 | |||
uint32_t sourceAddressBaseL; | |||
uint32_t sourceAddressBaseH; | |||
uint32_t sourceAddressOffset; | |||
uint32_t destinationAddressBaseL; | |||
uint32_t destinationAddressBaseH; | |||
uint32_t destinationAddressOffset; | |||
uint32_t nonTailDataLength; | |||
uint32_t tailDataLength; | |||
uint32_t res9[2]; | |||
} rtFftsPlusSdmaCtx_t; | |||
// ffts plus notify record/wait context | |||
typedef struct tagFftsPlusNotifyCtx { | |||
// 0-3 bytes | |||
uint8_t hardwareContextType; | |||
uint8_t softwareContextType; | |||
uint8_t successorNum; | |||
uint8_t res1: 7; | |||
uint8_t aten: 1; | |||
// 4-7 | |||
uint8_t res2; | |||
uint8_t res3; | |||
uint8_t predCntInit; | |||
uint8_t predCnt; | |||
// 8-11 | |||
uint32_t res4; | |||
// 12-63 | |||
uint16_t successorList[RT_CTX_SUCCESSOR_NUM]; | |||
// 64-67 | |||
uint16_t res5: 15; | |||
uint16_t atm: 1; | |||
uint16_t res6; | |||
// 68-71 | |||
uint32_t res7; | |||
// 72-75 | |||
uint16_t threadId; | |||
uint16_t threadDim; | |||
// 76-79 | |||
uint16_t notifyIdBase; | |||
uint16_t res8; | |||
// 80-127 | |||
uint32_t res9[12]; | |||
} rtFftsPlusNotifyCtx_t; | |||
// write Value context | |||
typedef struct tagFftsPlusWriteValueCtx { | |||
// 0-3 bytes | |||
uint8_t hardwareContextType; | |||
uint8_t softwareContextType; | |||
uint8_t successorNum; | |||
uint8_t res1: 7; | |||
uint8_t aten: 1; | |||
// 4-7 | |||
uint8_t res2; | |||
uint8_t res3; | |||
uint8_t predCntInit; | |||
uint8_t predCnt; | |||
// 8-11 | |||
uint32_t res4; | |||
// 12-63 | |||
uint16_t successorList[RT_CTX_SUCCESSOR_NUM]; | |||
// 64-67 | |||
uint16_t res5: 15; | |||
uint16_t atm: 1; | |||
uint16_t res6; | |||
// 68-71 | |||
uint32_t res7; | |||
// 72-75 | |||
uint16_t threadId; | |||
uint16_t threadDim; | |||
// 76-79 | |||
uint8_t awSize: 3; | |||
uint8_t snoop: 1; | |||
uint8_t res8: 4; | |||
uint8_t awCache: 4; | |||
uint8_t awProt: 3; | |||
uint8_t va: 1; | |||
uint16_t res9; | |||
// 80-83 | |||
uint32_t writeAddressBaseL; | |||
// 84-87 | |||
uint32_t writeAddressBaseH: 17; | |||
uint32_t res10: 15; | |||
// 88-91 | |||
uint32_t writeAddressOffset; | |||
// 92-95 | |||
uint32_t res11; | |||
// 96-111 | |||
uint32_t writeValue[4]; // write_value_00 -> write_value_03 | |||
// 112-127 | |||
uint32_t res12[4]; | |||
} rtFftsPlusWriteValueCtx_t; | |||
// ai cpu context | |||
typedef struct tagFftsPlusAiCpuCtx { | |||
// 0-3 bytes | |||
uint8_t hardwareContextType; | |||
uint8_t softwareContextType; | |||
uint8_t successorNum; | |||
uint8_t res1: 7; | |||
uint8_t aten: 1; | |||
// 4-7 | |||
uint8_t res2; | |||
uint8_t res3; | |||
uint8_t predCntInit; | |||
uint8_t predCnt; | |||
// 8-11 | |||
uint32_t res4; | |||
// 12-63 | |||
uint16_t successorContextID[RT_CTX_SUCCESSOR_NUM]; | |||
// 64-67 | |||
uint16_t sat: 1; | |||
uint16_t res5: 14; | |||
uint16_t atm: 1; | |||
uint16_t res6; | |||
// 68-71 | |||
uint16_t sqeIndex; | |||
uint8_t kernelType: 7; | |||
uint8_t bm: 1; | |||
uint8_t topicType: 4; | |||
uint8_t qos: 3; | |||
uint8_t res7: 1; | |||
// 72-75 | |||
uint16_t threadId; | |||
uint16_t threadDim; | |||
// 76-79 | |||
uint16_t nonTailBlockdim; | |||
uint16_t tailBlockdim; | |||
// 80-115 | |||
uint32_t usrData[9]; // usr_data0 -> usr_data8 usr_data2(task_param_base_l) usr_data3(task_param_base_h) | |||
// 116--119 | |||
uint32_t res8; | |||
// 120-123 | |||
uint32_t subtopicId: 12; | |||
uint32_t topicId: 6; | |||
uint32_t groupId: 6; | |||
uint32_t usrDataLength: 8; | |||
// 124-127 | |||
uint32_t taskParamOffset; | |||
} rtFftsPlusAiCpuCtx_t; | |||
// data context | |||
typedef struct tagFftsPlusDataCtx { | |||
// 0-3 bytes | |||
uint8_t hardwareContextType; | |||
uint8_t softwareContextType; | |||
uint8_t successorNum; | |||
uint8_t res1: 7; | |||
uint8_t aten: 1; | |||
// 4-7 | |||
uint8_t res2; | |||
uint8_t res3; | |||
uint8_t cntInit; // cons_cnt_init / prod_cnt_init | |||
uint8_t cnt; // cons_cnt / prod_cnt | |||
// 8-11 | |||
uint32_t res4; | |||
// 12-63 | |||
uint16_t successorList[RT_CTX_SUCCESSOR_NUM]; | |||
// 64-67 | |||
uint16_t res5: 15; | |||
uint16_t atm: 1; | |||
uint16_t res6; | |||
// 68-81 | |||
uint16_t origConsumerCounter; | |||
uint16_t runConsumerCounter; | |||
// 72-75 | |||
uint16_t threadId; | |||
uint16_t threadDim; | |||
// 76-79 | |||
uint32_t res7; | |||
// 80-83 | |||
uint32_t addressBaseL; | |||
// 84-87 | |||
uint32_t addressBaseH; | |||
// 88-91 | |||
uint32_t addressOffset; | |||
// 92-95 | |||
uint32_t res8; | |||
// 96-99 | |||
uint16_t nonTailNumOutter; | |||
uint16_t nonTailNumInner; | |||
// 100-103 | |||
uint32_t nonTailLengthInner; | |||
// 104-107 | |||
uint32_t nonTailStrideOutter; | |||
// 108-111 | |||
uint32_t nonTailStrideInner; | |||
// 112-115 | |||
uint16_t tailNumOutter; | |||
uint16_t tailNumInner; | |||
// 116-119 | |||
uint32_t tailLengthInner; | |||
// 120-123 | |||
uint32_t tailStrideOutter; | |||
// 124-127 | |||
uint32_t tailStrideInner; | |||
} rtFftsPlusDataCtx_t; | |||
// at start context | |||
typedef struct tagFftsPlusAtStartCtx { | |||
// 0-3 bytes | |||
uint8_t hardwareContextType; | |||
uint8_t softwareContextType; | |||
uint8_t successorNum; | |||
uint8_t res1: 7; | |||
uint8_t aten: 1; | |||
// 4-7 | |||
uint8_t res2; | |||
uint8_t res3; | |||
uint8_t predCntInit; | |||
uint8_t predCnt; | |||
// 8-11 | |||
uint32_t res4; | |||
// 12-63 | |||
uint16_t successorList[RT_CTX_SUCCESSOR_NUM]; | |||
// 64-67 | |||
uint16_t res5; | |||
uint16_t res6; | |||
// 68-71 | |||
uint16_t res7; | |||
uint16_t res8; | |||
// 72-75 | |||
uint16_t threadId; | |||
uint16_t threadDim; | |||
// 76-79 | |||
uint16_t threadIdInit; | |||
uint16_t threadWindowSize; | |||
// 80-127 | |||
uint16_t res9[12]; | |||
} rtFftsPlusAtStartCtx_t; | |||
// at end context | |||
#define RT_CTX_SUCC_AT_START_SLOT_NUM 12 | |||
#define RT_CTX_SUCC_OUT_LABEL_SLOT_NUM 12 | |||
typedef struct tagFftsPlusAtEndCtx { | |||
// 0-3 bytes | |||
uint8_t hardwareContextType; | |||
uint8_t softwareContextType; | |||
uint8_t atStartSlotNumber; | |||
uint8_t outLabelSlotNumber: 7; | |||
uint8_t aten: 1; | |||
// 4-7 | |||
uint8_t res1; | |||
uint8_t res2; | |||
uint8_t predCntInit; | |||
uint8_t predCnt; | |||
// 8-11 | |||
uint32_t res3; | |||
// 12-59 | |||
uint16_t succAtStartSlot[RT_CTX_SUCC_AT_START_SLOT_NUM]; | |||
uint16_t succOutLabelSlot[RT_CTX_SUCC_OUT_LABEL_SLOT_NUM]; | |||
// 60-63 | |||
uint16_t res4; | |||
uint16_t res5; | |||
// 64-67 | |||
uint16_t res6; | |||
uint16_t res7; | |||
// 68-71 | |||
uint16_t res8; | |||
uint16_t res9; | |||
// 72-75 | |||
uint16_t threadId; | |||
uint16_t res10; | |||
// 76-79 | |||
uint16_t res11; | |||
uint16_t res12; | |||
// 80-127 | |||
uint32_t res13[12]; | |||
} rtFftsPlusAtEndCtx_t; | |||
// label context | |||
typedef struct tagFftsPlusLabelCtx { | |||
// 0-3 bytes | |||
uint8_t hardwareContextType; | |||
uint8_t softwareContextType; | |||
uint8_t successorNum; | |||
uint8_t res1; | |||
// 4-7 | |||
uint8_t res2; | |||
uint8_t res3; | |||
uint8_t predCntInit; | |||
uint8_t predCnt; | |||
// 8-11 | |||
uint32_t res4; | |||
// 12-63 | |||
uint16_t successorList[RT_CTX_SUCCESSOR_NUM]; | |||
// 64-79 | |||
uint16_t res5[8]; | |||
// 80-127 | |||
uint32_t res6[12]; | |||
} rtFftsPlusLabelCtx_t; | |||
// case switch context | |||
typedef struct tagFftsPlusCaseSwitchCtx { | |||
// 0-3 bytes | |||
uint8_t hardwareContextType; | |||
uint8_t softwareContextType; | |||
uint8_t successorNum; | |||
uint8_t res1: 7; | |||
uint8_t aten: 1; | |||
// 4-7 | |||
uint8_t startLabelId; | |||
uint8_t labelListLen; | |||
uint8_t predCntInit; | |||
uint8_t predCnt; | |||
// 8-11 | |||
uint32_t res2; | |||
// 12-63 | |||
uint16_t successorList[RT_CTX_SUCCESSOR_NUM]; | |||
// 64-67 | |||
uint16_t res3: 15; | |||
uint16_t atm: 1; | |||
uint16_t res4; | |||
// 68-71 | |||
uint32_t res5; | |||
// 72-75 | |||
uint16_t threadId; | |||
uint16_t threadDim; | |||
// 76-79 | |||
uint8_t arSize: 3; | |||
uint8_t snoop: 1; | |||
uint8_t res6: 4; | |||
uint8_t arCache: 4; | |||
uint8_t arProt: 3; | |||
uint8_t va: 1; | |||
uint16_t res7; | |||
// 80-83 | |||
uint32_t loadAddress0BaseL; | |||
// 84-87 | |||
uint32_t loadAddress0BaseH: 17; | |||
uint32_t res8: 14; | |||
uint32_t ld0En: 1; | |||
// 88-91 | |||
uint32_t loadAddress0Offset; | |||
// 92-95 | |||
uint32_t res9; | |||
// 96-99 | |||
uint32_t loadAddress1BaseL; | |||
// 100-103 | |||
uint32_t loadAddress1BaseH: 17; | |||
uint32_t res10: 14; | |||
uint32_t ld1En: 1; | |||
// 104-107 | |||
uint32_t loadAddress1Offset; | |||
// 108-127 | |||
uint32_t res11[5]; | |||
} rtFftsPlusCaseSwitchCtx_t; | |||
// case default context | |||
typedef struct tagFftsPlusCaseDefCtx { | |||
// 0-3 bytes | |||
uint8_t hardwareContextType; | |||
uint8_t softwareContextType; | |||
uint8_t successorNum; | |||
uint8_t res1: 7; | |||
uint8_t aten: 1; | |||
// 4-7 | |||
uint8_t startLabelId; | |||
uint8_t labelListLen; | |||
uint8_t predCntInit; | |||
uint8_t predCnt; | |||
// 8-11 | |||
uint32_t res2; | |||
// 12-63 | |||
uint16_t successorList[RT_CTX_SUCCESSOR_NUM]; | |||
// 64-67 | |||
uint16_t res3; | |||
uint16_t res4; | |||
// 68-127 | |||
uint32_t res5[15]; | |||
} rtFftsPlusCaseDefCtx_t; | |||
// condition switch context | |||
#define RT_CTX_TRUE_SUCCESSOR_NUM 12 | |||
#define RT_CTX_FALSE_SUCCESSOR_NUM 14 | |||
typedef struct tagFftsPlusCondSwitchCtx { | |||
// 0-3 bytes | |||
uint8_t hardwareContextType; | |||
uint8_t softwareContextType; | |||
uint8_t trueSuccessorNum; | |||
uint8_t falseSuccessorNum: 7; | |||
uint8_t aten: 1; | |||
// 4-7 | |||
uint8_t condition; | |||
uint8_t res1; | |||
uint8_t predCntInit; | |||
uint8_t predCnt; | |||
// 8-11 | |||
uint32_t res2; | |||
// 12-63 | |||
uint16_t trueSuccessorList[RT_CTX_TRUE_SUCCESSOR_NUM]; | |||
uint16_t falseSuccessorList[RT_CTX_FALSE_SUCCESSOR_NUM]; | |||
// 64-67 | |||
uint16_t res3: 15; | |||
uint16_t atm: 1; | |||
uint16_t res4; | |||
// 68-71 | |||
uint32_t res5; | |||
// 72-75 | |||
uint16_t threadId; | |||
uint16_t threadDim; | |||
// 76-79 | |||
uint8_t arSize: 3; | |||
uint8_t snoop: 1; | |||
uint8_t res6: 4; | |||
uint8_t arCache: 4; | |||
uint8_t arProt: 3; | |||
uint8_t va: 1; | |||
uint16_t res7; | |||
// 80-83 | |||
uint32_t loadAddress0BaseL; | |||
// 84-87 | |||
uint32_t loadAddress0BaseH: 17; | |||
uint32_t res8: 14; | |||
uint32_t ld0En: 1; | |||
// 88-91 | |||
uint32_t loadAddress0Offset; | |||
// 92-95 | |||
uint32_t res9; | |||
// 96-99 | |||
uint32_t loadAddress1BaseL; | |||
// 100-103 | |||
uint32_t loadAddress1BaseH: 17; | |||
uint32_t res10: 14; | |||
uint32_t ld1En: 1; | |||
// 104-107 | |||
uint32_t loadAddress1Offset; | |||
// 108-127 | |||
uint32_t res11[3]; | |||
uint32_t cmpValue1; | |||
uint32_t cmpValue2; | |||
} rtFftsPlusCondSwitchCtx_t; | |||
#pragma pack(pop) | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
#endif // __CCE_RUNTIME_FFTS_PLUS_DEFINE_H |
@@ -53,6 +53,7 @@ typedef enum tagModelTaskType { | |||
RT_MODEL_TASK_ALL_KERNEL, | |||
RT_MODEL_TASK_PROFILER_TRACE_EX, | |||
RT_MODEL_TASK_FFTS_TASK, | |||
RT_MODEL_TASK_FFTS_PLUS_TASK, | |||
} rtModelTaskType_t; | |||
typedef enum tagModelStreamType { | |||
@@ -0,0 +1,97 @@ | |||
/* | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||
* Description: the definition of stars | |||
*/ | |||
#ifndef __CCE_RUNTIME_STARS_DEFINE__H | |||
#define __CCE_RUNTIME_STARS_DEFINE__H | |||
#include "base.h" | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
extern "C" { | |||
#endif | |||
#pragma pack(push) | |||
#pragma pack (1) | |||
typedef struct tagStarsSqeHeader { | |||
uint8_t type: 6; | |||
uint8_t l1Lock: 1; | |||
uint8_t l1Unlock: 1; | |||
uint8_t ie: 2; | |||
uint8_t preP: 2; | |||
uint8_t postP: 2; | |||
uint8_t wrCqe: 1; | |||
uint8_t reserved: 1; | |||
uint16_t blockDim; | |||
uint16_t rtStreamId; | |||
uint16_t taskId; | |||
} rtStarsSqeHeader_t; | |||
// ffts+ type | |||
typedef enum tagFftsPlusType { | |||
RT_FFTS_PLUS_TYPE_RES1 = 2, // Reserved | |||
RT_FFTS_PLUS_TYPE_RES2 = 3, // Reserved | |||
RT_FFTS_PLUS_TYPE = 4, // FFTS+ mode | |||
} rtFftsPlusType_t; | |||
// ffts+ sqe | |||
typedef struct tagFftsPlusSqe { | |||
// 0-7 bytes | |||
rtStarsSqeHeader_t sqeHeader; | |||
// 8-11 bytes | |||
uint16_t fftsType: 3; | |||
uint16_t reserved1: 13; | |||
uint16_t reserved2; | |||
// 12-15 bytes | |||
uint16_t pmg: 2; | |||
uint16_t ns: 1; | |||
uint16_t partId: 8; | |||
uint16_t reserved3: 1; | |||
uint16_t qos: 4; | |||
uint8_t kernelCredit; | |||
uint8_t reserved4; | |||
// 16-23 bytes | |||
uint32_t stackPhyBaseL; | |||
uint32_t stackPhyBaseH; | |||
// 24-31 bytes | |||
uint16_t totalContextNum; | |||
uint16_t readyContextNum; | |||
uint16_t preloadContextNum; | |||
uint16_t reserved5; | |||
// 32-35 bytes | |||
uint16_t reserved6: 8; | |||
uint16_t reserved7: 4; | |||
uint16_t dsplitUnit: 3; | |||
uint16_t reserved8: 1; | |||
uint16_t prefetchOstNum: 5; | |||
uint16_t reserved9: 3; | |||
uint16_t cmaintOstNum: 5; | |||
uint16_t reserved10: 3; | |||
// 36-39 bytes | |||
uint16_t aicPrefetchLower: 5; | |||
uint16_t reserved11: 3; | |||
uint16_t aicPrefetchUpper: 5; | |||
uint16_t Reserved12: 3; | |||
uint16_t aivPrefetchLower: 5; | |||
uint16_t Reserved13: 3; | |||
uint16_t aivPrefetchUpper: 5; | |||
uint16_t Reserved14: 3; | |||
// 40-47 bytes | |||
uint32_t contextAddressBaseL; | |||
uint32_t contextAddressBaseH:17; | |||
uint32_t reserved15:15; | |||
// 48-63 bytes | |||
uint32_t reserved16[4]; | |||
} rtFftsPlusSqe_t; | |||
#pragma pack(pop) | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
#endif // __CCE_RUNTIME_STARS_DEFINE__H |