@@ -40,6 +40,12 @@ class TensorBuffer { | |||||
TensorBuffer &operator = (const TensorBuffer &) = delete; | TensorBuffer &operator = (const TensorBuffer &) = delete; | ||||
~TensorBuffer(); | ~TensorBuffer(); | ||||
void* Release() { | |||||
auto ret = buffer_; | |||||
buffer_ = nullptr; | |||||
return ret; | |||||
} | |||||
void *GetData() { | void *GetData() { | ||||
return buffer_; | return buffer_; | ||||
} | } | ||||
@@ -48,6 +54,10 @@ class TensorBuffer { | |||||
return size_; | return size_; | ||||
} | } | ||||
MemStorageType GetMemType() const { | |||||
return mem_type_; | |||||
} | |||||
private: | private: | ||||
TensorBuffer(NpuMemoryAllocator *allocator, void *buffer, size_t size, MemStorageType mem_type = HBM); | TensorBuffer(NpuMemoryAllocator *allocator, void *buffer, size_t size, MemStorageType mem_type = HBM); | ||||
@@ -69,6 +79,10 @@ class TensorValue { | |||||
void Destroy(); | void Destroy(); | ||||
void *Release() { | |||||
return buffer_->Release(); | |||||
} | |||||
bool IsEmpty() { | bool IsEmpty() { | ||||
return ref_buffer_ == nullptr && buffer_ == nullptr; | return ref_buffer_ == nullptr && buffer_ == nullptr; | ||||
} | } | ||||
@@ -80,6 +94,10 @@ class TensorValue { | |||||
void SetName(const std::string &name) { | void SetName(const std::string &name) { | ||||
name_ = name; | name_ = name; | ||||
} | } | ||||
MemStorageType GetMemType() const { | |||||
return buffer_->GetMemType(); | |||||
} | |||||
void *MutableData(); | void *MutableData(); | ||||
@@ -19,6 +19,13 @@ | |||||
#include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
#include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
#include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
#include "graph/types.h" | |||||
#include "graph/debug/ge_attr_define.h" | |||||
#include "graph/manager/graph_caching_allocator.h" | |||||
#include "graph/manager/graph_mem_allocator.h" | |||||
#include "graph/manager/rdma_pool_allocator.h" | |||||
#include "graph/manager/host_mem_allocator.h" | |||||
#include "graph/manager/graph_mem_manager.h" | |||||
namespace ge { | namespace ge { | ||||
namespace hybrid { | namespace hybrid { | ||||
@@ -440,22 +447,31 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a | |||||
GeShape ge_shape(tensor_desc->GetShape().GetDims()); | GeShape ge_shape(tensor_desc->GetShape().GetDims()); | ||||
GeTensorDesc ge_tensor_desc; | GeTensorDesc ge_tensor_desc; | ||||
ge_tensor_desc.SetShape(ge_shape); | ge_tensor_desc.SetShape(ge_shape); | ||||
GeTensor ge_tensor(ge_tensor_desc); | |||||
if (output_size > 0) { | if (output_size > 0) { | ||||
auto aligned_ptr = MakeShared<AlignedPtr>(output_size, kAlignment); | |||||
GE_CHECK_NOTNULL(aligned_ptr); | |||||
auto data_buf = aligned_ptr->MutableGet(); | |||||
GE_CHECK_NOTNULL(data_buf); | |||||
GE_CHK_RT_RET(rtMemcpy(data_buf, output_size, output_tensor.GetData(), output_size, RT_MEMCPY_DEVICE_TO_HOST)); | |||||
ge_tensor.SetData(aligned_ptr, output_size); | |||||
output_data->blobs.emplace_back(data_buf, static_cast<uint32_t>(output_size), false); | |||||
if (execute_mode != kLazyRecompile) { | |||||
auto aligned_ptr = MakeShared<AlignedPtr>(output_size, kAlignment); | |||||
GE_CHECK_NOTNULL(aligned_ptr); | |||||
auto data_buf = aligned_ptr->MutableGet(); | |||||
GE_CHECK_NOTNULL(data_buf); | |||||
GE_CHK_RT_RET(rtMemcpy(data_buf, output_size, output_tensor.GetData(), output_size, RT_MEMCPY_DEVICE_TO_HOST)); | |||||
GeTensor ge_tensor(ge_tensor_desc); | |||||
ge_tensor.SetData(aligned_ptr, output_size); | |||||
output_data->blobs.emplace_back(data_buf, static_cast<uint32_t>(output_size), false); | |||||
auto tensor = TensorAdapter::AsTensor(ge_tensor); | |||||
outputs.emplace_back(std::move(tensor)); | |||||
} else { | |||||
BuildDeviceTensor(output_tensor, ge_tensor_desc, output_size, outputs); | |||||
output_data->blobs.emplace_back(output_tensor.Release(), static_cast<uint32_t>(output_size), false, | |||||
static_cast<uint32_t>(kPlacementDevice)); | |||||
} | |||||
} else { | } else { | ||||
GELOGW("Output[%zu] is empty. shape = [%s]", i, tensor_desc->GetShape().ToString().c_str()); | |||||
GELOGW("Output [%zu] is empty. shape = [%s]", i, tensor_desc->GetShape().ToString().c_str()); | |||||
GeTensor ge_tensor(ge_tensor_desc); | |||||
ge_tensor.SetData(nullptr, 0U); | ge_tensor.SetData(nullptr, 0U); | ||||
output_data->blobs.emplace_back(nullptr, 0U, false); | output_data->blobs.emplace_back(nullptr, 0U, false); | ||||
auto tensor = TensorAdapter::AsTensor(ge_tensor); | |||||
outputs.emplace_back(std::move(tensor)); | |||||
} | } | ||||
auto tensor = TensorAdapter::AsTensor(ge_tensor); | |||||
outputs.emplace_back(std::move(tensor)); | |||||
GELOGD("Output[%zu] added, type = %s, shape = [%s], size = %ld", i, | GELOGD("Output[%zu] added, type = %s, shape = [%s], size = %ld", i, | ||||
TypeUtils::DataTypeToSerialString(tensor_desc->GetDataType()).c_str(), | TypeUtils::DataTypeToSerialString(tensor_desc->GetDataType()).c_str(), | ||||
tensor_desc->GetShape().ToString().c_str(), output_size); | tensor_desc->GetShape().ToString().c_str(), output_size); | ||||
@@ -464,6 +480,29 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
void HybridModelAsyncExecutor::BuildDeviceTensor(TensorValue &output_tensor, GeTensorDesc &ge_tensor_desc, | |||||
int64_t output_size, std::vector<ge::Tensor> &outputs) { | |||||
GELOGD("Start to build device tensor"); | |||||
auto mem_type = output_tensor.GetMemType(); | |||||
GELOGD("Mem type is %d", static_cast<uint32_t>(mem_type)); | |||||
auto deleter = [=](uint8_t *device_data) { | |||||
if (device_data != nullptr) { | |||||
if (mem_type == RDMA_HBM) { | |||||
MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(device_data, device_id_); | |||||
} else if (mem_type == HOST_DDR) { | |||||
MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Free(device_data); | |||||
} else { | |||||
MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(device_data, device_id_); | |||||
} | |||||
} | |||||
}; | |||||
ge_tensor_desc.SetPlacement(kPlacementDevice); | |||||
GeTensor ge_tensor(ge_tensor_desc); | |||||
auto tensor = TensorAdapter::AsTensor(ge_tensor); | |||||
tensor.SetData(reinterpret_cast<uint8_t *>(output_tensor.Release()), static_cast<size_t>(output_size), deleter); | |||||
outputs.emplace_back(std::move(tensor)); | |||||
} | |||||
Status HybridModelAsyncExecutor::Execute(const std::vector<DataBuffer> &inputs, | Status HybridModelAsyncExecutor::Execute(const std::vector<DataBuffer> &inputs, | ||||
const std::vector<GeTensorDesc> &input_desc, | const std::vector<GeTensorDesc> &input_desc, | ||||
std::vector<DataBuffer> &outputs, | std::vector<DataBuffer> &outputs, | ||||
@@ -75,9 +75,9 @@ class HybridModelAsyncExecutor { | |||||
HybridModelExecutor::ExecuteArgs &args, | HybridModelExecutor::ExecuteArgs &args, | ||||
OutputData *output_data); | OutputData *output_data); | ||||
Status CopyOutputs(HybridModelExecutor::ExecuteArgs &args, | |||||
OutputData *output_data, | |||||
std::vector<ge::Tensor> &outputs); | |||||
Status CopyOutputs(HybridModelExecutor::ExecuteArgs &args, OutputData *output_data, std::vector<ge::Tensor> &outputs); | |||||
void BuildDeviceTensor(TensorValue &output_tensor, GeTensorDesc &ge_tensor_desc, int64_t output_size, | |||||
std::vector<ge::Tensor> &outputs); | |||||
Status OnComputeDone(uint32_t data_index, uint32_t result_code, std::vector<ge::Tensor> &outputs); | Status OnComputeDone(uint32_t data_index, uint32_t result_code, std::vector<ge::Tensor> &outputs); | ||||
@@ -61,6 +61,9 @@ const std::string kTaskTypeAicore = "AI_CORE"; | |||||
const std::string kTaskTypeAicpu = "AI_CPU"; | const std::string kTaskTypeAicpu = "AI_CPU"; | ||||
const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID"; | const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID"; | ||||
// dynamic execute mode | |||||
const char *const kLazyRecompile = "lazy_recompile"; | |||||
// Data cache, including data address and length | // Data cache, including data address and length | ||||
struct DataBuffer { | struct DataBuffer { | ||||
public: | public: | ||||
@@ -1 +1 @@ | |||||
Subproject commit 8dd3448e2f0150c51266bc120bdd5d171a003e6b | |||||
Subproject commit 1aa10c59b4e11564c2db76c2ba0039474d38df26 |
@@ -828,6 +828,7 @@ set(HYBRID_TEST_FILES | |||||
"hybrid/executor/worker/execution_engine_unittest.cc" | "hybrid/executor/worker/execution_engine_unittest.cc" | ||||
"hybrid/model/hybrid_model_builder_unittest.cc" | "hybrid/model/hybrid_model_builder_unittest.cc" | ||||
"hybrid/node_executor/rts/rts_node_task_unittest.cc" | "hybrid/node_executor/rts/rts_node_task_unittest.cc" | ||||
"hybrid/executor/hybrid_model_async_executor_unittest.cc" | |||||
) | ) | ||||
set(OTHERS_TEST_FILES | set(OTHERS_TEST_FILES | ||||
@@ -0,0 +1,89 @@ | |||||
/** | |||||
* Copyright 2019-2021 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include <gtest/gtest.h> | |||||
#include <gmock/gmock.h> | |||||
#include <vector> | |||||
#define private public | |||||
#define protected public | |||||
#include "hybrid/executor/hybrid_model_async_executor.h" | |||||
#include "hybrid/executor/hybrid_model_executor.h" | |||||
#include "graph/utils/tensor_utils.h" | |||||
#include "graph/utils/graph_utils.h" | |||||
#include "graph/debug/ge_attr_define.h" | |||||
using namespace std; | |||||
using namespace testing; | |||||
namespace ge { | |||||
using namespace hybrid; | |||||
class UtestHybridModelAsyncExecutor : public testing::Test { | |||||
protected: | |||||
void SetUp() {} | |||||
void TearDown() { } | |||||
}; | |||||
TEST_F(UtestHybridModelAsyncExecutor, CopyOutputs_success) { | |||||
ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||||
GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph); | |||||
ge_root_model->SetModelName("test_name"); | |||||
GeModelPtr ge_sub_model = make_shared<GeModel>(); | |||||
HybridModel hybrid_model(ge_root_model); | |||||
HybridModelAsyncExecutor executor(&hybrid_model); | |||||
TensorValue input_tensor; | |||||
HybridModelExecutor::ExecuteArgs args; | |||||
args.inputs.emplace_back(input_tensor); | |||||
auto desc = MakeShared<GeTensorDesc>(); | |||||
GeShape geshape({2,2,2,2}); | |||||
desc->SetShape(geshape); | |||||
auto allocator = NpuMemoryAllocator::GetAllocator(); | |||||
auto tensor_buffer = TensorBuffer::Create(allocator, 100); | |||||
auto output_tensor = TensorValue(shared_ptr<TensorBuffer>(tensor_buffer.release())); | |||||
args.outputs.emplace_back(output_tensor); | |||||
args.output_desc.emplace_back(desc); | |||||
OutputData output_data; | |||||
std::vector<ge::Tensor> outputs; | |||||
auto ret = executor.CopyOutputs(args, &output_data, outputs); | |||||
ASSERT_EQ(ret,SUCCESS); | |||||
} | |||||
TEST_F(UtestHybridModelAsyncExecutor, BuildDeviceTensor) { | |||||
ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||||
GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph); | |||||
ge_root_model->SetModelName("test_name"); | |||||
GeModelPtr ge_sub_model = make_shared<GeModel>(); | |||||
HybridModel hybrid_model(ge_root_model); | |||||
HybridModelAsyncExecutor executor(&hybrid_model); | |||||
auto allocator = NpuMemoryAllocator::GetAllocator(); | |||||
auto tensor_buffer = TensorBuffer::Create(allocator, 100); | |||||
auto tensor = TensorValue(shared_ptr<TensorBuffer>(tensor_buffer.release())); | |||||
GeTensorDesc ge_tensor_desc; | |||||
int64_t output_size = 100; | |||||
std::vector<ge::Tensor> outputs; | |||||
executor.BuildDeviceTensor(tensor, ge_tensor_desc, output_size, outputs); | |||||
auto size = tensor.GetSize(); | |||||
ASSERT_EQ(size, 100); | |||||
} | |||||
} // namespace ge |