@@ -66,6 +66,7 @@ void GetGeTensorDescFromDomiInfo(std::vector<ge::TensorDesc> &ge_descs, | |||||
ge::Shape ge_shape(shape_dims); | ge::Shape ge_shape(shape_dims); | ||||
ge_desc.SetShape(ge_shape); | ge_desc.SetShape(ge_shape); | ||||
ge_desc.SetSize(desc_item.size); | ge_desc.SetSize(desc_item.size); | ||||
ge_desc.SetShapeRange(desc_item.shape_info.shape_ranges); | |||||
ge_descs.emplace_back(ge_desc); | ge_descs.emplace_back(ge_desc); | ||||
++idx; | ++idx; | ||||
} | } | ||||
@@ -19,6 +19,10 @@ | |||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
namespace ge { | namespace ge { | ||||
namespace { | |||||
const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown"; | |||||
} | |||||
Status MarkGraphUnknownStatusPass::Run(ComputeGraphPtr graph) { | Status MarkGraphUnknownStatusPass::Run(ComputeGraphPtr graph) { | ||||
GE_CHECK_NOTNULL(graph); | GE_CHECK_NOTNULL(graph); | ||||
bool is_unknown_shape = false; | bool is_unknown_shape = false; | ||||
@@ -35,6 +39,11 @@ Status MarkGraphUnknownStatusPass::Run(ComputeGraphPtr graph) { | |||||
break; | break; | ||||
} | } | ||||
} | } | ||||
for (const auto &node : graph->GetDirectNode()) { | |||||
GELOGD("Set OwnerGraphIsUnknown attr to node[%s]", node->GetName().c_str()); | |||||
(void)AttrUtils::SetBool(node->GetOpDesc(), kOwnerGraphIsUnknown, is_unknown_shape); | |||||
} | |||||
graph->SetGraphUnknownFlag(is_unknown_shape); | graph->SetGraphUnknownFlag(is_unknown_shape); | ||||
GELOGD("mark graph [%s] unknown status success! value is %d", graph->GetName().c_str(), is_unknown_shape); | GELOGD("mark graph [%s] unknown status success! value is %d", graph->GetName().c_str(), is_unknown_shape); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -58,7 +58,7 @@ Status HybridModelAsyncExecutor::Start(const std::shared_ptr<ModelListener> &lis | |||||
run_flag_ = true; | run_flag_ = true; | ||||
listener_ = listener; | listener_ = listener; | ||||
future_ = std::async([&]() -> Status { | |||||
future_ = std::async(std::launch::async, [&]() -> Status { | |||||
GetContext().SetSessionId(executor_->GetContext()->session_id); | GetContext().SetSessionId(executor_->GetContext()->session_id); | ||||
return RunInternal(); | return RunInternal(); | ||||
}); | }); | ||||
@@ -72,7 +72,11 @@ Status HybridModelAsyncExecutor::Stop() { | |||||
std::lock_guard<std::mutex> lk(mu_); | std::lock_guard<std::mutex> lk(mu_); | ||||
run_flag_ = false; | run_flag_ = false; | ||||
data_inputer_->Stop(); | data_inputer_->Stop(); | ||||
auto ret = future_.get(); | |||||
Status ret = SUCCESS; | |||||
if (future_.valid()) { | |||||
ret = future_.get(); | |||||
} | |||||
if (stream_ != nullptr) { | if (stream_ != nullptr) { | ||||
GE_CHK_RT(rtStreamDestroy(stream_)); | GE_CHK_RT(rtStreamDestroy(stream_)); | ||||
@@ -49,7 +49,7 @@ Status CallbackManager::RegisterCallback(rtCallback_t callback, void *user_data) | |||||
Status CallbackManager::Init() { | Status CallbackManager::Init() { | ||||
rtContext_t ctx = nullptr; | rtContext_t ctx = nullptr; | ||||
GE_CHK_RT_RET(rtCtxGetCurrent(&ctx)); | GE_CHK_RT_RET(rtCtxGetCurrent(&ctx)); | ||||
ret_future_ = std::async([&](rtContext_t context) ->Status { | |||||
ret_future_ = std::async(std::launch::async, [&](rtContext_t context) ->Status { | |||||
return CallbackProcess(context); | return CallbackProcess(context); | ||||
}, ctx); | }, ctx); | ||||
if (!ret_future_.valid()) { | if (!ret_future_.valid()) { | ||||
@@ -307,7 +307,7 @@ Status SubgraphExecutor::LaunchTasks() { | |||||
Status SubgraphExecutor::ScheduleTasks() { | Status SubgraphExecutor::ScheduleTasks() { | ||||
GELOGD("[%s] Start to schedule prepare workers.", graph_item_->GetName().c_str()); | GELOGD("[%s] Start to schedule prepare workers.", graph_item_->GetName().c_str()); | ||||
auto prepare_future = std::async([&]() -> Status { | |||||
auto prepare_future = std::async(std::launch::async, [&]() -> Status { | |||||
GetContext().SetSessionId(context_->session_id); | GetContext().SetSessionId(context_->session_id); | ||||
auto ret = PrepareNodes(); | auto ret = PrepareNodes(); | ||||
ready_queue_.Push(nullptr); | ready_queue_.Push(nullptr); | ||||
@@ -62,7 +62,7 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { | |||||
{ | { | ||||
std::lock_guard<std::mutex> lk(mu_); | std::lock_guard<std::mutex> lk(mu_); | ||||
RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); | RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); | ||||
GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndType(node_item.node), "Invoke InferShapeAndType failed."); | |||||
GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), "Invoke InferShapeAndType failed."); | |||||
RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End"); | RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End"); | ||||
} | } | ||||
// Check again to make sure shape is valid after shape inference | // Check again to make sure shape is valid after shape inference | ||||
@@ -37,6 +37,16 @@ const uint32_t kSubgraphIndex = 0U; | |||||
const uint32_t kVarOutputIndex = 0U; | const uint32_t kVarOutputIndex = 0U; | ||||
const uint32_t kAlignment = 32; | const uint32_t kAlignment = 32; | ||||
const int kBytes = 8; | const int kBytes = 8; | ||||
const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown"; | |||||
bool IsGraphUnknown(ComputeGraph &graph) { | |||||
for (const auto &node : graph.GetDirectNode()) { | |||||
bool is_unknown_shape = false; | |||||
(void)AttrUtils::GetBool(node->GetOpDesc(), kOwnerGraphIsUnknown, is_unknown_shape); | |||||
return is_unknown_shape; | |||||
} | |||||
return false; | |||||
} | |||||
int64_t CalcVarSizeInBytes(const GeTensorDesc &desc) { | int64_t CalcVarSizeInBytes(const GeTensorDesc &desc) { | ||||
int64_t var_size = 0; | int64_t var_size = 0; | ||||
@@ -556,7 +566,7 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGrap | |||||
auto subgraph = NodeUtils::GetSubgraph(*node, kSubgraphIndex); | auto subgraph = NodeUtils::GetSubgraph(*node, kSubgraphIndex); | ||||
GE_CHECK_NOTNULL(subgraph); | GE_CHECK_NOTNULL(subgraph); | ||||
bool is_unknown_shape = subgraph->GetGraphUnknownFlag(); | |||||
bool is_unknown_shape = IsGraphUnknown(*subgraph); | |||||
if (!is_unknown_shape) { | if (!is_unknown_shape) { | ||||
merged_graph->AddNode(node); | merged_graph->AddNode(node); | ||||
GELOGD("[%s] Known shape partitioned call added to merged graph.", op_desc->GetName().c_str()); | GELOGD("[%s] Known shape partitioned call added to merged graph.", op_desc->GetName().c_str()); | ||||
@@ -603,7 +613,7 @@ Status HybridModelBuilder::UnfoldSubgraph(ComputeGraph &root_graph, | |||||
if (sub_op_type == PARTITIONEDCALL) { | if (sub_op_type == PARTITIONEDCALL) { | ||||
auto sub_sub_graph = NodeUtils::GetSubgraph(*sub_node, kSubgraphIndex); | auto sub_sub_graph = NodeUtils::GetSubgraph(*sub_node, kSubgraphIndex); | ||||
GE_CHECK_NOTNULL(sub_sub_graph); | GE_CHECK_NOTNULL(sub_sub_graph); | ||||
if (sub_sub_graph->GetGraphUnknownFlag()) { | |||||
if (IsGraphUnknown(*sub_sub_graph)) { | |||||
GE_CHK_STATUS_RET(UnfoldSubgraph(root_graph, parent_graph, *sub_sub_graph), | GE_CHK_STATUS_RET(UnfoldSubgraph(root_graph, parent_graph, *sub_sub_graph), | ||||
"[%s] Failed to merge subgraph", | "[%s] Failed to merge subgraph", | ||||
sub_sub_graph->GetName().c_str()); | sub_sub_graph->GetName().c_str()); | ||||
@@ -693,7 +703,7 @@ Status HybridModelBuilder::LoadGraph() { | |||||
continue; | continue; | ||||
} | } | ||||
if (sub_graph->GetGraphUnknownFlag()) { | |||||
if (IsGraphUnknown(*sub_graph)) { | |||||
GE_CHK_STATUS_RET(LoadDynamicSubgraph(*sub_graph, false), | GE_CHK_STATUS_RET(LoadDynamicSubgraph(*sub_graph, false), | ||||
"Failed to load subgraph: [%s]", | "Failed to load subgraph: [%s]", | ||||
sub_graph->GetName().c_str()); | sub_graph->GetName().c_str()); | ||||
@@ -947,7 +957,7 @@ Status HybridModelBuilder::IndexTaskDefs() { | |||||
continue; | continue; | ||||
} | } | ||||
bool is_unknown_shape = sub_graph->GetGraphUnknownFlag(); | |||||
bool is_unknown_shape = IsGraphUnknown(*sub_graph); | |||||
if (!is_unknown_shape) { | if (!is_unknown_shape) { | ||||
GE_CHK_STATUS_RET_NOLOG(LoadGeModel(*sub_graph, ge_model)); | GE_CHK_STATUS_RET_NOLOG(LoadGeModel(*sub_graph, ge_model)); | ||||
continue; | continue; | ||||
@@ -244,6 +244,9 @@ void TaskCompilerFactory::Register(CreateFn fn) { | |||||
} | } | ||||
std::unique_ptr<TaskCompiler> TaskCompilerFactory::GetTaskCompiler() { | std::unique_ptr<TaskCompiler> TaskCompilerFactory::GetTaskCompiler() { | ||||
if (compiler_func_ == nullptr) { | |||||
return nullptr; | |||||
} | |||||
auto compiler_instance = std::unique_ptr<TaskCompiler>(compiler_func_()); | auto compiler_instance = std::unique_ptr<TaskCompiler>(compiler_func_()); | ||||
return compiler_instance; | return compiler_instance; | ||||
} | } | ||||
@@ -18,6 +18,7 @@ | |||||
#include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
#include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
#include "init/gelib.h" | #include "init/gelib.h" | ||||
#include "graph/utils/tensor_utils.h" | |||||
#include "hybrid/model/hybrid_model.h" | #include "hybrid/model/hybrid_model.h" | ||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "opskernel_manager/ops_kernel_builder_manager.h" | #include "opskernel_manager/ops_kernel_builder_manager.h" | ||||
@@ -32,6 +33,7 @@ const char *const kEngineNameAiCpuTf = "aicpu_tf_kernel"; | |||||
const char *const kEngineNameHccl = "ops_kernel_info_hccl"; | const char *const kEngineNameHccl = "ops_kernel_info_hccl"; | ||||
const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE"; | const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE"; | ||||
const char *const kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE"; | const char *const kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE"; | ||||
const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown"; | |||||
} | } | ||||
Status NodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { | Status NodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { | ||||
GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs()); | GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs()); | ||||
@@ -80,16 +82,17 @@ NodeExecutorManager::ExecutorType NodeExecutorManager::ResolveExecutorType(Node | |||||
auto op_type = node.GetType(); | auto op_type = node.GetType(); | ||||
if (op_type == PARTITIONEDCALL) { | if (op_type == PARTITIONEDCALL) { | ||||
const auto &subgraph = NodeUtils::GetSubgraph(node, 0); | const auto &subgraph = NodeUtils::GetSubgraph(node, 0); | ||||
if (subgraph != nullptr && subgraph->GetGraphUnknownFlag()) { | |||||
GELOGD("node %s was marked as unknown shape in node_executor.", node.GetName().c_str()); | |||||
return ExecutorType::DYNAMIC_SUBGRAPH; | |||||
} | |||||
bool is_dynamic = false; | |||||
(void) NodeUtils::GetNodeUnknownShapeStatus(node, is_dynamic); | |||||
if (is_dynamic) { | |||||
return ExecutorType::DYNAMIC_SUBGRAPH; | |||||
if (subgraph != nullptr) { | |||||
for (const auto &node : subgraph->GetDirectNode()) { | |||||
bool is_unknown_shape = false; | |||||
(void)AttrUtils::GetBool(node->GetOpDesc(), kOwnerGraphIsUnknown, is_unknown_shape); | |||||
if (is_unknown_shape) { | |||||
return ExecutorType::DYNAMIC_SUBGRAPH; | |||||
} else { | |||||
return ExecutorType::COMPILED_SUBGRAPH; | |||||
} | |||||
} | |||||
} | } | ||||
return ExecutorType::COMPILED_SUBGRAPH; | |||||
} | } | ||||
// rts kernel store is assigned to NetOutput | // rts kernel store is assigned to NetOutput | ||||
@@ -28,16 +28,9 @@ | |||||
#include "external/ge/ge_api_types.h" | #include "external/ge/ge_api_types.h" | ||||
namespace ge { | namespace ge { | ||||
enum RuntimeType { | |||||
HOST = 0, | |||||
DEVICE = 1 | |||||
}; | |||||
enum RuntimeType { HOST = 0, DEVICE = 1 }; | |||||
enum PerfLevel { | |||||
GEN_TASK_WITH_FUSION = -1, | |||||
GEN_TASK_WITHOUT_L2FUSION = 3, | |||||
GEN_TASK_WITHOUT_FUSION = 4 | |||||
}; | |||||
enum PerfLevel { GEN_TASK_WITH_FUSION = -1, GEN_TASK_WITHOUT_L2FUSION = 3, GEN_TASK_WITHOUT_FUSION = 4 }; | |||||
enum FrameworkType { | enum FrameworkType { | ||||
CAFFE = 0, | CAFFE = 0, | ||||
@@ -55,12 +48,7 @@ enum OpEngineType { | |||||
ENGINE_AIVECTOR = 4 // not support | ENGINE_AIVECTOR = 4 // not support | ||||
}; | }; | ||||
enum InputAippType{ | |||||
DATA_WITHOUT_AIPP = 0, | |||||
DATA_WITH_STATIC_AIPP, | |||||
DATA_WITH_DYNAMIC_AIPP, | |||||
DYNAMIC_AIPP_NODE | |||||
}; | |||||
enum InputAippType { DATA_WITHOUT_AIPP = 0, DATA_WITH_STATIC_AIPP, DATA_WITH_DYNAMIC_AIPP, DYNAMIC_AIPP_NODE }; | |||||
const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | const char *const GE_ENGINE_ATTR_MEM_TYPE_HBM = "HBM"; | ||||
const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | const char *const GE_OPTION_EXEC_PLACEMENT = "ge.exec.placement"; | ||||
@@ -107,7 +95,7 @@ struct OutputData { | |||||
struct Command { | struct Command { | ||||
std::string cmd_type; // Command type | std::string cmd_type; // Command type | ||||
std::vector<std::string> cmd_params; // Command params | std::vector<std::string> cmd_params; // Command params | ||||
uint64_t module_index; // prof module | |||||
uint64_t module_index; // prof module | |||||
}; | }; | ||||
// The definition of I/O shape description | // The definition of I/O shape description | ||||
@@ -117,6 +105,7 @@ struct ShapeDescription { | |||||
int64_t height = 0; | int64_t height = 0; | ||||
int64_t width = 0; | int64_t width = 0; | ||||
std::vector<int64_t> dims; | std::vector<int64_t> dims; | ||||
std::vector<std::pair<int64_t, int64_t>> shape_ranges; | |||||
}; | }; | ||||
// Definition of input and output description information | // Definition of input and output description information | ||||
@@ -1 +1 @@ | |||||
Subproject commit ba04e25e878af2ac5f9a697806daee0768ae3bad | |||||
Subproject commit 1b09ed04b6dd22d1aed1bee92fd42736c0fafc65 |
@@ -1 +1 @@ | |||||
Subproject commit 308e3587ec54fdd32ed7113d64a1335208701f59 | |||||
Subproject commit 3d49906d119b1cc01f4256d7992759ce9f3dcfcd |