From 926ac9238c2ced629aa974585b970d0a13ab837e Mon Sep 17 00:00:00 2001 From: zhou_chao1993 Date: Fri, 14 May 2021 17:24:23 +0800 Subject: [PATCH] dynamic getnext --- ge/hybrid/common/tensor_value.h | 18 +++++ ge/hybrid/executor/hybrid_model_async_executor.cc | 61 ++++++++++++--- ge/hybrid/executor/hybrid_model_async_executor.h | 6 +- inc/framework/common/ge_types.h | 3 + metadef | 2 +- tests/ut/ge/CMakeLists.txt | 1 + .../hybrid_model_async_executor_unittest.cc | 89 ++++++++++++++++++++++ 7 files changed, 165 insertions(+), 15 deletions(-) create mode 100644 tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc diff --git a/ge/hybrid/common/tensor_value.h b/ge/hybrid/common/tensor_value.h index 19e1ba27..348e4e6d 100644 --- a/ge/hybrid/common/tensor_value.h +++ b/ge/hybrid/common/tensor_value.h @@ -40,6 +40,12 @@ class TensorBuffer { TensorBuffer &operator = (const TensorBuffer &) = delete; ~TensorBuffer(); + void* Release() { + auto ret = buffer_; + buffer_ = nullptr; + return ret; + } + void *GetData() { return buffer_; } @@ -48,6 +54,10 @@ class TensorBuffer { return size_; } + MemStorageType GetMemType() const { + return mem_type_; + } + private: TensorBuffer(NpuMemoryAllocator *allocator, void *buffer, size_t size, MemStorageType mem_type = HBM); @@ -69,6 +79,10 @@ class TensorValue { void Destroy(); + void *Release() { + return buffer_->Release(); + } + bool IsEmpty() { return ref_buffer_ == nullptr && buffer_ == nullptr; } @@ -80,6 +94,10 @@ class TensorValue { void SetName(const std::string &name) { name_ = name; } + + MemStorageType GetMemType() const { + return buffer_->GetMemType(); + } void *MutableData(); diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index af06e27b..930412e3 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -19,6 +19,13 @@ #include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" #include "graph/ge_context.h" +#include "graph/types.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/manager/graph_caching_allocator.h" +#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/rdma_pool_allocator.h" +#include "graph/manager/host_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" namespace ge { namespace hybrid { @@ -440,22 +447,31 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a GeShape ge_shape(tensor_desc->GetShape().GetDims()); GeTensorDesc ge_tensor_desc; ge_tensor_desc.SetShape(ge_shape); - GeTensor ge_tensor(ge_tensor_desc); if (output_size > 0) { - auto aligned_ptr = MakeShared(output_size, kAlignment); - GE_CHECK_NOTNULL(aligned_ptr); - auto data_buf = aligned_ptr->MutableGet(); - GE_CHECK_NOTNULL(data_buf); - GE_CHK_RT_RET(rtMemcpy(data_buf, output_size, output_tensor.GetData(), output_size, RT_MEMCPY_DEVICE_TO_HOST)); - ge_tensor.SetData(aligned_ptr, output_size); - output_data->blobs.emplace_back(data_buf, static_cast(output_size), false); + if (execute_mode != kLazyRecompile) { + auto aligned_ptr = MakeShared(output_size, kAlignment); + GE_CHECK_NOTNULL(aligned_ptr); + auto data_buf = aligned_ptr->MutableGet(); + GE_CHECK_NOTNULL(data_buf); + GE_CHK_RT_RET(rtMemcpy(data_buf, output_size, output_tensor.GetData(), output_size, RT_MEMCPY_DEVICE_TO_HOST)); + GeTensor ge_tensor(ge_tensor_desc); + ge_tensor.SetData(aligned_ptr, output_size); + output_data->blobs.emplace_back(data_buf, static_cast(output_size), false); + auto tensor = TensorAdapter::AsTensor(ge_tensor); + outputs.emplace_back(std::move(tensor)); + } else { + BuildDeviceTensor(output_tensor, ge_tensor_desc, output_size, outputs); + output_data->blobs.emplace_back(output_tensor.Release(), static_cast(output_size), false, + static_cast(kPlacementDevice)); + } } else { - GELOGW("Output[%zu] is empty. shape = [%s]", i, tensor_desc->GetShape().ToString().c_str()); + GELOGW("Output [%zu] is empty. shape = [%s]", i, tensor_desc->GetShape().ToString().c_str()); + GeTensor ge_tensor(ge_tensor_desc); ge_tensor.SetData(nullptr, 0U); output_data->blobs.emplace_back(nullptr, 0U, false); + auto tensor = TensorAdapter::AsTensor(ge_tensor); + outputs.emplace_back(std::move(tensor)); } - auto tensor = TensorAdapter::AsTensor(ge_tensor); - outputs.emplace_back(std::move(tensor)); GELOGD("Output[%zu] added, type = %s, shape = [%s], size = %ld", i, TypeUtils::DataTypeToSerialString(tensor_desc->GetDataType()).c_str(), tensor_desc->GetShape().ToString().c_str(), output_size); @@ -464,6 +480,29 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a return SUCCESS; } +void HybridModelAsyncExecutor::BuildDeviceTensor(TensorValue &output_tensor, GeTensorDesc &ge_tensor_desc, + int64_t output_size, std::vector &outputs) { + GELOGD("Start to build device tensor"); + auto mem_type = output_tensor.GetMemType(); + GELOGD("Mem type is %d", static_cast(mem_type)); + auto deleter = [=](uint8_t *device_data) { + if (device_data != nullptr) { + if (mem_type == RDMA_HBM) { + MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(device_data, device_id_); + } else if (mem_type == HOST_DDR) { + MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Free(device_data); + } else { + MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(device_data, device_id_); + } + } + }; + ge_tensor_desc.SetPlacement(kPlacementDevice); + GeTensor ge_tensor(ge_tensor_desc); + auto tensor = TensorAdapter::AsTensor(ge_tensor); + tensor.SetData(reinterpret_cast(output_tensor.Release()), static_cast(output_size), deleter); + outputs.emplace_back(std::move(tensor)); +} + Status HybridModelAsyncExecutor::Execute(const std::vector &inputs, const std::vector &input_desc, std::vector &outputs, diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h index c6d99c7c..5ae1a222 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.h +++ b/ge/hybrid/executor/hybrid_model_async_executor.h @@ -75,9 +75,9 @@ class HybridModelAsyncExecutor { HybridModelExecutor::ExecuteArgs &args, OutputData *output_data); - Status CopyOutputs(HybridModelExecutor::ExecuteArgs &args, - OutputData *output_data, - std::vector &outputs); + Status CopyOutputs(HybridModelExecutor::ExecuteArgs &args, OutputData *output_data, std::vector &outputs); + void BuildDeviceTensor(TensorValue &output_tensor, GeTensorDesc &ge_tensor_desc, int64_t output_size, + std::vector &outputs); Status OnComputeDone(uint32_t data_index, uint32_t result_code, std::vector &outputs); diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index 01fc7468..64231b8c 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -61,6 +61,9 @@ const std::string kTaskTypeAicore = "AI_CORE"; const std::string kTaskTypeAicpu = "AI_CPU"; const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID"; +// dynamic execute mode +const char *const kLazyRecompile = "lazy_recompile"; + // Data cache, including data address and length struct DataBuffer { public: diff --git a/metadef b/metadef index 8dd3448e..1aa10c59 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 8dd3448e2f0150c51266bc120bdd5d171a003e6b +Subproject commit 1aa10c59b4e11564c2db76c2ba0039474d38df26 diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 38e39a06..2b68d8fe 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -828,6 +828,7 @@ set(HYBRID_TEST_FILES "hybrid/executor/worker/execution_engine_unittest.cc" "hybrid/model/hybrid_model_builder_unittest.cc" "hybrid/node_executor/rts/rts_node_task_unittest.cc" + "hybrid/executor/hybrid_model_async_executor_unittest.cc" ) set(OTHERS_TEST_FILES diff --git a/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc b/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc new file mode 100644 index 00000000..b4091a50 --- /dev/null +++ b/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc @@ -0,0 +1,89 @@ +/** + * Copyright 2019-2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#define private public +#define protected public +#include "hybrid/executor/hybrid_model_async_executor.h" +#include "hybrid/executor/hybrid_model_executor.h" +#include "graph/utils/tensor_utils.h" +#include "graph/utils/graph_utils.h" +#include "graph/debug/ge_attr_define.h" + + +using namespace std; +using namespace testing; + + +namespace ge { +using namespace hybrid; + +class UtestHybridModelAsyncExecutor : public testing::Test { + protected: + void SetUp() {} + + void TearDown() { } +}; + +TEST_F(UtestHybridModelAsyncExecutor, CopyOutputs_success) { + ComputeGraphPtr graph = std::make_shared("test"); + GeRootModelPtr ge_root_model = make_shared(graph); + ge_root_model->SetModelName("test_name"); + GeModelPtr ge_sub_model = make_shared(); + HybridModel hybrid_model(ge_root_model); + HybridModelAsyncExecutor executor(&hybrid_model); + + TensorValue input_tensor; + HybridModelExecutor::ExecuteArgs args; + args.inputs.emplace_back(input_tensor); + auto desc = MakeShared(); + GeShape geshape({2,2,2,2}); + desc->SetShape(geshape); + + auto allocator = NpuMemoryAllocator::GetAllocator(); + auto tensor_buffer = TensorBuffer::Create(allocator, 100); + auto output_tensor = TensorValue(shared_ptr(tensor_buffer.release())); + args.outputs.emplace_back(output_tensor); + args.output_desc.emplace_back(desc); + + OutputData output_data; + std::vector outputs; + auto ret = executor.CopyOutputs(args, &output_data, outputs); + ASSERT_EQ(ret,SUCCESS); +} + +TEST_F(UtestHybridModelAsyncExecutor, BuildDeviceTensor) { + ComputeGraphPtr graph = std::make_shared("test"); + GeRootModelPtr ge_root_model = make_shared(graph); + ge_root_model->SetModelName("test_name"); + GeModelPtr ge_sub_model = make_shared(); + HybridModel hybrid_model(ge_root_model); + HybridModelAsyncExecutor executor(&hybrid_model); + + auto allocator = NpuMemoryAllocator::GetAllocator(); + auto tensor_buffer = TensorBuffer::Create(allocator, 100); + auto tensor = TensorValue(shared_ptr(tensor_buffer.release())); + GeTensorDesc ge_tensor_desc; + int64_t output_size = 100; + std::vector outputs; + executor.BuildDeviceTensor(tensor, ge_tensor_desc, output_size, outputs); + auto size = tensor.GetSize(); + ASSERT_EQ(size, 100); +} +} // namespace ge \ No newline at end of file