Merge pull request !1899 from 赵之轩/my_dev4tags/v1.5.1
@@ -214,7 +214,7 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> | |||||
context.SetTaskId(task_id); | context.SetTaskId(task_id); | ||||
context.SetStreamId(stream_id); | context.SetStreamId(stream_id); | ||||
GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | ||||
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim()); | |||||
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicore, (*it)->GetBlockDim(), (*it)->GetOpType()); | |||||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); | ||||
} | } | ||||
@@ -33,6 +33,7 @@ namespace { | |||||
constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; | ||||
constexpr char const *kAttrOpParamSize = "op_para_size"; | constexpr char const *kAttrOpParamSize = "op_para_size"; | ||||
constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | ||||
const string kAtomicOpType = "DynamicAtomicAddrClean"; | |||||
std::atomic<std::uint64_t> log_id(0); | std::atomic<std::uint64_t> log_id(0); | ||||
} // namespace | } // namespace | ||||
@@ -51,6 +52,7 @@ bool TbeHandleRegistry::AddHandle(std::unique_ptr<TbeHandleHolder> &&holder) { | |||||
} | } | ||||
Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | ||||
op_type_ = op_desc.GetType(); | |||||
log_name_ = op_desc.GetName() + "_tvmbin"; | log_name_ = op_desc.GetName() + "_tvmbin"; | ||||
log_id_ = log_id++; | log_id_ = log_id++; | ||||
auto op_desc_ptr = MakeShared<OpDesc>(op_desc); | auto op_desc_ptr = MakeShared<OpDesc>(op_desc); | ||||
@@ -538,6 +540,10 @@ const std::string &AiCoreOpTask::GetName() const { | |||||
return stub_name_; | return stub_name_; | ||||
} | } | ||||
const std::string &AiCoreOpTask::GetOpType() const { | |||||
return op_type_; | |||||
} | |||||
std::string AiCoreOpTask::GetKeyForOpParamSize() const { | std::string AiCoreOpTask::GetKeyForOpParamSize() const { | ||||
return kAttrOpParamSize; | return kAttrOpParamSize; | ||||
} | } | ||||
@@ -631,6 +637,10 @@ std::string AtomicAddrCleanOpTask::GetKeyForKernelName(const OpDesc &op_desc) co | |||||
return op_desc.GetName() + "_atomic_kernelname"; | return op_desc.GetName() + "_atomic_kernelname"; | ||||
} | } | ||||
const std::string &AtomicAddrCleanOpTask::GetOpType() const { | |||||
return kAtomicOpType; | |||||
} | |||||
Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { | Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { | ||||
GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); | GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); | ||||
GE_CHK_STATUS_RET(optiling::OpAtomicCalculateV2(*node, tiling_info), | GE_CHK_STATUS_RET(optiling::OpAtomicCalculateV2(*node, tiling_info), | ||||
@@ -80,6 +80,8 @@ class AiCoreOpTask { | |||||
void SetSingleOp(bool is_single_op) {is_single_op_ = is_single_op;}; | void SetSingleOp(bool is_single_op) {is_single_op_ = is_single_op;}; | ||||
virtual const std::string& GetOpType() const; | |||||
protected: | protected: | ||||
Status UpdateTilingInfo(TaskContext &context); | Status UpdateTilingInfo(TaskContext &context); | ||||
virtual std::string GetKeyForOpParamSize() const; | virtual std::string GetKeyForOpParamSize() const; | ||||
@@ -119,12 +121,14 @@ class AiCoreOpTask { | |||||
uint64_t log_id_ = 0; | uint64_t log_id_ = 0; | ||||
std::string log_name_; | std::string log_name_; | ||||
uint32_t offset_ = 0; | uint32_t offset_ = 0; | ||||
std::string op_type_; | |||||
}; | }; | ||||
class AtomicAddrCleanOpTask : public AiCoreOpTask { | class AtomicAddrCleanOpTask : public AiCoreOpTask { | ||||
public: | public: | ||||
Status Init(const OpDesc &op_desc, const domi::TaskDef &task_def) override; | Status Init(const OpDesc &op_desc, const domi::TaskDef &task_def) override; | ||||
Status UpdateArgs(TaskContext &task_context) override; | Status UpdateArgs(TaskContext &task_context) override; | ||||
const std::string& GetOpType() const override; | |||||
protected: | protected: | ||||
std::string GetKeyForOpParamSize() const override; | std::string GetKeyForOpParamSize() const override; | ||||
@@ -207,7 +207,7 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void( | |||||
context.SetTaskId(task_id); | context.SetTaskId(task_id); | ||||
context.SetStreamId(stream_id); | context.SetStreamId(stream_id); | ||||
GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id); | ||||
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0); | |||||
(void)context.SaveProfilingTaskDescInfo(task_id, stream_id, kTaskTypeAicpu, 0, node_type_); | |||||
auto callback = [=, &context]() { | auto callback = [=, &context]() { | ||||
GELOGD("Node[%s] callback start.", node_name_.c_str()); | GELOGD("Node[%s] callback start.", node_name_.c_str()); | ||||
RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); | ||||
@@ -571,8 +571,8 @@ Status TaskContext::Synchronize() { | |||||
return execution_context_->Synchronize(GetStream()); | return execution_context_->Synchronize(GetStream()); | ||||
} | } | ||||
Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||||
const std::string &task_type, uint32_t block_dim) { | |||||
Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, const std::string &task_type, | |||||
uint32_t block_dim, const std::string &op_type) { | |||||
if (ProfilingManager::Instance().ProfilingModelLoadOn()) { | if (ProfilingManager::Instance().ProfilingModelLoadOn()) { | ||||
const NodeItem &node_item = GetNodeItem(); | const NodeItem &node_item = GetNodeItem(); | ||||
auto op_desc = node_item.GetOpDesc(); | auto op_desc = node_item.GetOpDesc(); | ||||
@@ -586,7 +586,7 @@ Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream | |||||
TaskDescInfo tmp_task_desc_info; | TaskDescInfo tmp_task_desc_info; | ||||
tmp_task_desc_info.model_name = dynamic_model_name; | tmp_task_desc_info.model_name = dynamic_model_name; | ||||
tmp_task_desc_info.op_name = op_desc->GetName(); | tmp_task_desc_info.op_name = op_desc->GetName(); | ||||
tmp_task_desc_info.op_type = op_desc->GetType(); | |||||
tmp_task_desc_info.op_type = op_type; | |||||
tmp_task_desc_info.block_dim = block_dim; | tmp_task_desc_info.block_dim = block_dim; | ||||
tmp_task_desc_info.task_type = task_type; | tmp_task_desc_info.task_type = task_type; | ||||
tmp_task_desc_info.task_id = task_id; | tmp_task_desc_info.task_id = task_id; | ||||
@@ -118,8 +118,8 @@ class TaskContext { | |||||
void *handle_ = nullptr; | void *handle_ = nullptr; | ||||
const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | const std::vector<TaskDescInfo>& GetProfilingTaskDescInfo() const { return task_desc_info; } | ||||
Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, | |||||
const std::string &task_type, uint32_t block_dim); | |||||
Status SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, const std::string &task_type, | |||||
uint32_t block_dim, const std::string &op_type); | |||||
void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | void ClearProfilingTaskDescInfo() { task_desc_info.clear(); } | ||||
private: | private: | ||||
@@ -119,7 +119,7 @@ TEST_F(UtestExecutionEngine, ExecuteAsync_without_callback_and_kernel_task) { | |||||
uint32_t stream_id = 1; | uint32_t stream_id = 1; | ||||
std::string task_type = "rts"; | std::string task_type = "rts"; | ||||
uint32_t block_dim = 0; | uint32_t block_dim = 0; | ||||
node_state->GetTaskContext()->SaveProfilingTaskDescInfo(task_id, stream_id, task_type, block_dim); | |||||
node_state->GetTaskContext()->SaveProfilingTaskDescInfo(task_id, stream_id, task_type, block_dim, op_desc->GetType()); | |||||
ASSERT_TRUE(node_state->GetTaskContext() != nullptr); | ASSERT_TRUE(node_state->GetTaskContext() != nullptr); | ||||
@@ -102,7 +102,7 @@ TEST_F(UtestGeHybrid, aicore_op_task_init_success) { | |||||
op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel); | op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel); | ||||
std::string kernel_name("kernel/Add"); | std::string kernel_name("kernel/Add"); | ||||
AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); | AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); | ||||
ASSERT_EQ(aicore_task->InitWithTaskDef(*op_desc.get(), task_def), SUCCESS); | |||||
ASSERT_EQ(aicore_task->Init(*op_desc.get(), task_def), SUCCESS); | |||||
rtStream_t stream = nullptr; | rtStream_t stream = nullptr; | ||||
rtStreamCreate(&stream, 0); | rtStreamCreate(&stream, 0); | ||||
ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); | ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); | ||||
@@ -678,6 +678,15 @@ TEST_F(UtestGeHybrid, test_key_for_kernel_bin) { | |||||
EXPECT_EQ(atomic_task->GetKeyForKernelName(op_desc), "Sum_atomic_kernelname"); | EXPECT_EQ(atomic_task->GetKeyForKernelName(op_desc), "Sum_atomic_kernelname"); | ||||
} | } | ||||
TEST_F(UtestGeHybrid, test_op_type) { | |||||
auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask()); | |||||
aicore_task->op_type_ = "Add"; | |||||
EXPECT_EQ(aicore_task->GetOpType(), "Add"); | |||||
auto atomic_task = std::unique_ptr<hybrid::AtomicAddrCleanOpTask>(new(std::nothrow)hybrid::AtomicAddrCleanOpTask()); | |||||
EXPECT_EQ(atomic_task->GetOpType(), "DynamicAtomicAddrClean"); | |||||
} | |||||
TEST_F(UtestGeHybrid, TestParseDependentInputNodesForHccl) { | TEST_F(UtestGeHybrid, TestParseDependentInputNodesForHccl) { | ||||
NodeExecutorManager::GetInstance().engine_mapping_.emplace("ops_kernel_info_hccl", | NodeExecutorManager::GetInstance().engine_mapping_.emplace("ops_kernel_info_hccl", | ||||
NodeExecutorManager::ExecutorType::HCCL); | NodeExecutorManager::ExecutorType::HCCL); | ||||