From 207bf69c20a5953ae01499434922244161e67206 Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Thu, 15 Jul 2021 20:01:58 +0800 Subject: [PATCH 1/5] bugfix for taskdef's random variation in offline case --- ge/graph/build/task_generator.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index 7bb2e2f6..1adcd0aa 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -50,6 +50,7 @@ const char *const kIsInputVar = "INPUT_IS_VAR"; const char *const kIsOutputVar = "OUTPUT_IS_VAR"; const char *const kProfilingMode = "PROFILING_MODE"; const char *const kIteratorV2 = "IteratorV2"; +const char *const kKernelInfoNameHccl = "ops_kernel_info_hccl"; const uint32_t kProfilingArStep = 2; const uint64_t kProfilingFpStartLogid = 1; const uint64_t kProfilingBpEndLogid = 2; @@ -437,14 +438,15 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra } // Reset stream id to ge stream id, as graph load must use ge stream to reassign stream - void *ops_kernel_info_store_ptr = kernel_info_store.get(); for (size_t idx = task_list_size_before; idx < task_list_size_after; ++idx) { task_def_list[idx].set_stream_id(static_cast(stream_id)); op_name_map[idx] = name; - // Set opsKernelInfoStorePtr and op_index, the two fields be use in DistributeTask and InitTaskInfo TaskDef *task_def_ptr = &task_def_list[idx]; GE_CHECK_NOTNULL(task_def_ptr); - task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast(ops_kernel_info_store_ptr)); + // Set opsKernelInfoStorePtr for hccl which will be use in DistributeTask and InitTaskInfo + if (op_kernel_lib_name == kKernelInfoNameHccl) { + task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast(kernel_info_store.get())); + } } GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task finished, generate %zu task(s).", op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id, From 051d0e9fab55a2530b364ecea1e98c1705e308de Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Thu, 15 Jul 2021 20:43:09 +0800 Subject: [PATCH 2/5] Fix bug of single_op. --- ge/single_op/single_op.cc | 4 +++- ge/single_op/task/op_task.cc | 25 ++++++++++++++++++++---- ge/single_op/task/op_task.h | 5 +++-- ge/single_op/task/tbe_task_builder.cc | 2 +- tests/ut/ge/single_op/single_op_task_unittest.cc | 20 +++++++++++++++++++ 5 files changed, 48 insertions(+), 8 deletions(-) diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index a82c30ba..23f4cfad 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -433,11 +433,13 @@ Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, if (!inputs_size.empty()) { StreamResource *stream_resource = SingleOpManager::GetInstance().GetResource(resource_id_, stream_); GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource, stream_, inputs_size, update_buffers)); - GE_CHK_STATUS_RET_NOLOG(SetHostTensorValue(input_desc, input_buffers)); } if (hybrid_model_executor_ != nullptr) { GELOGD("Execute multi-task dynamic single op by hybrid model executor"); + if (!inputs_size.empty()) { + GE_CHK_STATUS_RET_NOLOG(SetHostTensorValue(input_desc, input_buffers)); + } hybrid::HybridModelExecutor::ExecuteArgs args; GE_CHK_STATUS_RET_NOLOG(InitHybridModelArgs(update_buffers, output_buffers, input_desc, args)); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index dbc90ac5..fd6639a5 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -294,16 +294,15 @@ Status TbeOpTask::UpdateNodeByShape(const vector &input_desc, cons Status TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size) { if (tiling_buffer != nullptr) { - uintptr_t *arg_base = nullptr; - size_t arg_num = 0; - GetIoAddr(arg_base, arg_num); + uintptr_t *arg_base = reinterpret_cast(args_.get()); + size_t arg_num = arg_size_ / sizeof(void *); GE_CHECK_NOTNULL(node); GE_CHECK_NOTNULL(node->GetOpDesc()); uint32_t inputs_num = node->GetOpDesc()->GetInputsSize(); uint32_t outputs_num = node->GetOpDesc()->GetOutputsSize(); uint32_t workspace_nums = node->GetOpDesc()->GetWorkspace().size(); uint32_t tiling_index = inputs_num + outputs_num + workspace_nums; - if (arg_num == 0 || arg_num < tiling_index) { + if (arg_num == 0 || arg_num <= tiling_index) { GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Check][Size]Tiling index %u, arg number %zu is invalid.", tiling_index, arg_num); return ACL_ERROR_GE_INTERNAL_ERROR; @@ -481,6 +480,24 @@ void TbeOpTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { } } +Status AtomicAddrCleanOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size) { + if (tiling_buffer != nullptr) { + uintptr_t *arg_base = reinterpret_cast(args_.get()); + size_t arg_num = arg_size_ / sizeof(void *); + uint32_t tiling_index = atomic_output_indices_.size(); + if (arg_num == 0 || arg_num <= tiling_index) { + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Check][Size]Tiling index %u, arg number %zu is invalid.", + tiling_index, arg_num); + return ACL_ERROR_GE_INTERNAL_ERROR; + } + arg_base[tiling_index] = reinterpret_cast(tiling_buffer); + } + node_ = node; + tiling_buffer_ = tiling_buffer; + max_tiling_size_ = max_tiling_size; + return SUCCESS; +} + Status AtomicAddrCleanOpTask::UpdateNodeByShape(const vector &input_desc, const vector &output_desc) { return SUCCESS; diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 132672b0..4a839389 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -97,7 +97,7 @@ class TbeOpTask : public OpTask { const void *GetArgs() const; size_t GetArgSize() const; const std::string &GetStubName() const; - Status EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size); + virtual Status EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size); const std::string &GetTaskType() const override; void SetHandle(void *handle); @@ -149,6 +149,7 @@ class TbeOpTask : public OpTask { class AtomicAddrCleanOpTask : public TbeOpTask { public: Status InitAtomicAddrCleanIndices(); + Status EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size) override; private: Status UpdateNodeByShape(const vector &input_desc, @@ -156,8 +157,8 @@ class AtomicAddrCleanOpTask : public TbeOpTask { Status UpdateIoAddr(const vector &inputs, const vector &outputs) override; Status UpdateTilingArgs(rtStream_t stream) override; Status CalcTilingInfo(optiling::utils::OpRunInfo &run_info) override; - std::vector atomic_output_indices_; + std::vector atomic_output_indices_; }; class AiCpuBaseTask : public OpTask { diff --git a/ge/single_op/task/tbe_task_builder.cc b/ge/single_op/task/tbe_task_builder.cc index 017dac25..f947ca57 100644 --- a/ge/single_op/task/tbe_task_builder.cc +++ b/ge/single_op/task/tbe_task_builder.cc @@ -425,7 +425,7 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) { GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); } - task.EnableDynamicSupport(node_, tiling_buffer, static_cast(max_size)); + GE_CHK_STATUS_RET_NOLOG(task.EnableDynamicSupport(node_, tiling_buffer, static_cast(max_size))); return SUCCESS; } diff --git a/tests/ut/ge/single_op/single_op_task_unittest.cc b/tests/ut/ge/single_op/single_op_task_unittest.cc index 8964df74..5960fbbc 100644 --- a/tests/ut/ge/single_op/single_op_task_unittest.cc +++ b/tests/ut/ge/single_op/single_op_task_unittest.cc @@ -237,3 +237,23 @@ TEST_F(UtestSingleOpTask, test_aicpu_task_update_io_addr) { ASSERT_EQ(ret, PARAM_INVALID); } } + +TEST_F(UtestSingleOpTask, test_dynamic_support) { + auto graph = make_shared("graph"); + auto op_desc = make_shared("Add", "Add"); + auto node = graph->AddNode(op_desc); + AtomicAddrCleanOpTask atomic_task; + TbeOpTask tbe_task; + + ASSERT_EQ(tbe_task.EnableDynamicSupport(node, (void *)0x0001, 1), ACL_ERROR_GE_INTERNAL_ERROR); + ASSERT_EQ(atomic_task.EnableDynamicSupport(node, (void *)0x0001, 1), ACL_ERROR_GE_INTERNAL_ERROR); + + tbe_task.arg_size_ = sizeof(void *); + tbe_task.args_.reset(new (std::nothrow) uint8_t[tbe_task.arg_size_]); + atomic_task.arg_size_ = sizeof(void *); + atomic_task.args_.reset(new (std::nothrow) uint8_t[atomic_task.arg_size_]); + ASSERT_EQ(tbe_task.EnableDynamicSupport(node, (void *)0x0001, 1), SUCCESS); + ASSERT_EQ(atomic_task.EnableDynamicSupport(node, (void *)0x0001, 1), SUCCESS); + tbe_task.tiling_buffer_ = nullptr; + atomic_task.tiling_buffer_ = nullptr; +} From 21886e608e12983bbe3aecf74e053ed1707ce121 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Fri, 16 Jul 2021 12:00:31 +0800 Subject: [PATCH 3/5] Fix review advice. --- ge/single_op/task/op_task.cc | 26 +++++++++++++----------- tests/ut/ge/single_op/single_op_task_unittest.cc | 8 ++++++-- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index fd6639a5..ee752022 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -293,25 +293,26 @@ Status TbeOpTask::UpdateNodeByShape(const vector &input_desc, cons } Status TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size) { + node_ = node; + tiling_buffer_ = tiling_buffer; + max_tiling_size_ = max_tiling_size; if (tiling_buffer != nullptr) { - uintptr_t *arg_base = reinterpret_cast(args_.get()); - size_t arg_num = arg_size_ / sizeof(void *); + uintptr_t *arg_base = nullptr; + size_t arg_num = 0; + GetIoAddr(arg_base, arg_num); GE_CHECK_NOTNULL(node); GE_CHECK_NOTNULL(node->GetOpDesc()); uint32_t inputs_num = node->GetOpDesc()->GetInputsSize(); uint32_t outputs_num = node->GetOpDesc()->GetOutputsSize(); uint32_t workspace_nums = node->GetOpDesc()->GetWorkspace().size(); uint32_t tiling_index = inputs_num + outputs_num + workspace_nums; - if (arg_num == 0 || arg_num <= tiling_index) { + if (arg_num == 0 || arg_num < tiling_index) { GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Check][Size]Tiling index %u, arg number %zu is invalid.", tiling_index, arg_num); return ACL_ERROR_GE_INTERNAL_ERROR; } arg_base[tiling_index] = reinterpret_cast(tiling_buffer); } - node_ = node; - tiling_buffer_ = tiling_buffer; - max_tiling_size_ = max_tiling_size; return SUCCESS; } @@ -481,20 +482,21 @@ void TbeOpTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { } Status AtomicAddrCleanOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size) { + node_ = node; + tiling_buffer_ = tiling_buffer; + max_tiling_size_ = max_tiling_size; if (tiling_buffer != nullptr) { - uintptr_t *arg_base = reinterpret_cast(args_.get()); - size_t arg_num = arg_size_ / sizeof(void *); + uintptr_t *arg_base = nullptr; + size_t arg_num = 0; + GetIoAddr(arg_base, arg_num); uint32_t tiling_index = atomic_output_indices_.size(); - if (arg_num == 0 || arg_num <= tiling_index) { + if (arg_num == 0 || arg_num < tiling_index) { GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Check][Size]Tiling index %u, arg number %zu is invalid.", tiling_index, arg_num); return ACL_ERROR_GE_INTERNAL_ERROR; } arg_base[tiling_index] = reinterpret_cast(tiling_buffer); } - node_ = node; - tiling_buffer_ = tiling_buffer; - max_tiling_size_ = max_tiling_size; return SUCCESS; } diff --git a/tests/ut/ge/single_op/single_op_task_unittest.cc b/tests/ut/ge/single_op/single_op_task_unittest.cc index 5960fbbc..9a0381cd 100644 --- a/tests/ut/ge/single_op/single_op_task_unittest.cc +++ b/tests/ut/ge/single_op/single_op_task_unittest.cc @@ -245,12 +245,16 @@ TEST_F(UtestSingleOpTask, test_dynamic_support) { AtomicAddrCleanOpTask atomic_task; TbeOpTask tbe_task; + tbe_task.arg_size_ = sizeof(void *) * 1; + tbe_task.args_.reset(new (std::nothrow) uint8_t[tbe_task.arg_size_]); + atomic_task.arg_size_ = sizeof(void *) * 1; + atomic_task.args_.reset(new (std::nothrow) uint8_t[atomic_task.arg_size_]); ASSERT_EQ(tbe_task.EnableDynamicSupport(node, (void *)0x0001, 1), ACL_ERROR_GE_INTERNAL_ERROR); ASSERT_EQ(atomic_task.EnableDynamicSupport(node, (void *)0x0001, 1), ACL_ERROR_GE_INTERNAL_ERROR); - tbe_task.arg_size_ = sizeof(void *); + tbe_task.arg_size_ = sizeof(void *) * 2; tbe_task.args_.reset(new (std::nothrow) uint8_t[tbe_task.arg_size_]); - atomic_task.arg_size_ = sizeof(void *); + atomic_task.arg_size_ = sizeof(void *) * 2; atomic_task.args_.reset(new (std::nothrow) uint8_t[atomic_task.arg_size_]); ASSERT_EQ(tbe_task.EnableDynamicSupport(node, (void *)0x0001, 1), SUCCESS); ASSERT_EQ(atomic_task.EnableDynamicSupport(node, (void *)0x0001, 1), SUCCESS); From 3dc9881cd65bef96a7583b9db4be0c9e138ddee9 Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Sat, 17 Jul 2021 19:22:59 +0800 Subject: [PATCH 4/5] bugfix for taskdef's random variation in offline case --- tests/ut/ge/graph/build/task_generator_unittest.cc | 77 +++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) diff --git a/tests/ut/ge/graph/build/task_generator_unittest.cc b/tests/ut/ge/graph/build/task_generator_unittest.cc index 1e865050..7be20fa1 100644 --- a/tests/ut/ge/graph/build/task_generator_unittest.cc +++ b/tests/ut/ge/graph/build/task_generator_unittest.cc @@ -29,6 +29,8 @@ #define protected public #define private public +#include "init/gelib.h" +#include "ge/opskernel_manager/ops_kernel_builder_manager.h" #include "graph/build/task_generator.h" #include "graph/manager/graph_mem_manager.h" #include "graph/manager/graph_var_manager.h" @@ -41,9 +43,46 @@ using namespace ge; namespace { const char *const kIsInputVar = "INPUT_IS_VAR"; const char *const kIsOutputVar = "OUTPUT_IS_VAR"; -} +const char *const kKernelInfoNameHccl = "ops_kernel_info_hccl"; +} // namespace class UtestTaskGeneratorTest : public testing::Test { public: + struct FakeOpsKernelBuilder : OpsKernelBuilder { + FakeOpsKernelBuilder(){}; + + private: + Status Initialize(const map &options) override { + return SUCCESS; + }; + Status Finalize() override { + return SUCCESS; + }; + Status CalcOpRunningParam(Node &node) override { + return SUCCESS; + }; + Status GenerateTask(const Node &node, RunContext &context, std::vector &tasks) override { + domi::TaskDef task_def; + tasks.push_back(task_def); + return SUCCESS; + }; + }; + + struct FakeOpsKernelInfoStore : OpsKernelInfoStore { + FakeOpsKernelInfoStore() = default; + + private: + Status Initialize(const std::map &options) override { + return SUCCESS; + }; + Status Finalize() override { + return SUCCESS; + }; + bool CheckSupported(const OpDescPtr &op_desc, std::string &reason) const override { + return true; + }; + void GetAllOpsKernelInfo(std::map &infos) const override{}; + }; + ge::ComputeGraphPtr BuildGraphFpProfiling() { ge::ut::GraphBuilder builder("graph"); auto data = builder.AddNode("data", "phony", 1, 1); @@ -95,6 +134,14 @@ class UtestTaskGeneratorTest : public testing::Test { return builder.GetGraph(); } + ge::ComputeGraphPtr BuildHcclGraph() { + ge::ut::GraphBuilder builder("graph"); + auto hccl_node = builder.AddNode("hccl_phony_node", "HCCL_PHONY", 0, 0); + auto op_desc = hccl_node->GetOpDesc(); + op_desc->SetOpKernelLibName(kKernelInfoNameHccl); + op_desc->SetStreamId(0); + return builder.GetGraph(); + } protected: void SetUp() {} @@ -156,3 +203,31 @@ TEST_F(UtestTaskGeneratorTest, AutoFindBpOpIndex) { output_desc->SetName("hcom"); EXPECT_EQ(task_generator.AutoFindBpOpIndex(graph, profiling_point, all_reduce_nodes), SUCCESS); } + +TEST_F(UtestTaskGeneratorTest, GenerateTask) { + map options; + Status ret = ge::GELib::Initialize(options); + EXPECT_EQ(ret, SUCCESS); + + shared_ptr instance_ptr = ge::GELib::GetInstance(); + EXPECT_NE(instance_ptr, nullptr); + + OpsKernelInfoStorePtr ops_kernel_info_store_ptr = MakeShared(); + instance_ptr->opsManager_.ops_kernel_store_.insert(make_pair(kKernelInfoNameHccl, ops_kernel_info_store_ptr)); + + OpsKernelBuilderManager &builder_manager_instance_ptr = ge::OpsKernelBuilderManager::Instance(); + OpsKernelBuilderPtr fake_builder = MakeShared(); + builder_manager_instance_ptr.ops_kernel_builders_[kKernelInfoNameHccl] = fake_builder; + + auto graph = BuildHcclGraph(); + TaskGenerator task_generator(nullptr, 0); + RunContext run_context; + run_context.graphStreamList.push_back(static_cast(ops_kernel_info_store_ptr.get())); + vector all_reduce_nodes; + vector task_def_list; + map op_name_map; + + EXPECT_EQ(task_generator.GenerateTask(run_context, graph, task_def_list, op_name_map), SUCCESS); + EXPECT_EQ(task_def_list.size(), 1); + EXPECT_EQ(task_def_list[0].ops_kernel_store_ptr(), reinterpret_cast(ops_kernel_info_store_ptr.get())); +} \ No newline at end of file From d715f462b1723dc56cd61d31a565da2c8abc8d35 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Mon, 19 Jul 2021 09:06:53 +0800 Subject: [PATCH 5/5] Delete unused from SubGraphInfo --- ge/graph/manager/graph_manager_utils.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/ge/graph/manager/graph_manager_utils.h b/ge/graph/manager/graph_manager_utils.h index efdbecf8..e17d9046 100644 --- a/ge/graph/manager/graph_manager_utils.h +++ b/ge/graph/manager/graph_manager_utils.h @@ -105,10 +105,7 @@ class SubGraphInfo { std::vector output_flag_; ModelIdInfo model_id_info_; GeModelPtr ge_model_ptr_; - bool malloc_flag_; - std::vector buffer_addr_; std::string output_names_; - std::vector buffer_size_; std::string stream_label_; std::unordered_map end_to_pld_; std::unordered_map pld_to_end_;