@@ -125,7 +125,7 @@ set(TRAIN_SRC_LIST | |||
"graph/manager/graph_var_manager.cc" | |||
"graph/manager/host_mem_manager.cc" | |||
"graph/manager/rdma_pool_allocator.cc" | |||
$<$<STREQUAL:${ENABLE_OPEN_SRC},FALSE>:graph/manager/host_mem_allocator.cc> | |||
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/manager/host_mem_allocator.cc> | |||
"graph/manager/memory_api.cc" | |||
"graph/manager/model_manager/event_manager.cc" | |||
"graph/manager/trans_var_data_utils.cc" | |||
@@ -167,7 +167,7 @@ set(TRAIN_SRC_LIST | |||
"graph/passes/hccl_group_pass.cc" | |||
"graph/passes/enter_pass.cc" | |||
"graph/passes/assign_pass.cc" | |||
$<$<STREQUAL:${ENABLE_OPEN_SRC},FALSE>:graph/passes/inplace_support_check_pass.cc> | |||
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/passes/inplace_support_check_pass.cc> | |||
"graph/passes/flow_ctrl_pass.cc" | |||
"graph/passes/global_step_insert_pass.cc" | |||
"host_kernels/transpose_kernel.cc" | |||
@@ -403,7 +403,7 @@ set(INFER_SRC_LIST | |||
"graph/manager/graph_var_manager.cc" | |||
"graph/manager/host_mem_manager.cc" | |||
"graph/manager/rdma_pool_allocator.cc" | |||
$<$<STREQUAL:${ENABLE_OPEN_SRC},FALSE>:graph/manager/host_mem_allocator.cc> | |||
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/manager/host_mem_allocator.cc> | |||
"graph/manager/graph_mem_allocator.cc" | |||
"graph/manager/graph_caching_allocator.cc" | |||
"model/ge_model.cc" | |||
@@ -525,7 +525,7 @@ set(INFER_SRC_LIST | |||
"graph/passes/for_pass.cc" | |||
"graph/passes/enter_pass.cc" | |||
"graph/passes/assign_pass.cc" | |||
$<$<STREQUAL:${ENABLE_OPEN_SRC},FALSE>:graph/passes/inplace_support_check_pass.cc> | |||
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/passes/inplace_support_check_pass.cc> | |||
"graph/passes/addn_pass.cc" | |||
"graph/passes/common_subexpression_elimination_pass.cc" | |||
"graph/passes/remove_same_const_pass.cc" | |||
@@ -624,6 +624,7 @@ target_compile_definitions(ge_runner PRIVATE | |||
FMK_SUPPORT_DUMP | |||
DAVINCI_CLOUD | |||
google=ascend_private | |||
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||
) | |||
target_compile_options(ge_runner PRIVATE | |||
@@ -691,6 +692,7 @@ target_compile_definitions(ge_compiler PRIVATE | |||
FMK_HOST_INFER | |||
COMPILE_OMG_PACKAGE | |||
google=ascend_private | |||
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||
) | |||
target_compile_options(ge_compiler PRIVATE | |||
@@ -28,7 +28,7 @@ set(SRC_LIST | |||
"../graph/manager/trans_var_data_utils.cc" | |||
"../graph/manager/util/debug.cc" | |||
"../graph/manager/rdma_pool_allocator.cc" | |||
$<$<STREQUAL:${ENABLE_OPEN_SRC},FALSE>:../graph/manager/host_mem_allocator.cc> | |||
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:../graph/manager/host_mem_allocator.cc> | |||
"../hybrid/node_executor/aicpu/aicpu_ext_info.cc" | |||
"../model/ge_model.cc" | |||
"../model/ge_root_model.cc" | |||
@@ -175,6 +175,7 @@ target_compile_definitions(ge_executor PRIVATE | |||
$<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0> | |||
$<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> | |||
LOG_CPP | |||
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||
) | |||
target_include_directories(ge_executor PRIVATE | |||
@@ -217,6 +218,7 @@ target_compile_definitions(ge_executor_shared PRIVATE | |||
PROTOBUF_INLINE_NOT_IN_HEADERS=0 | |||
DAVINCI_SUPPORT_PROFILING | |||
google=ascend_private | |||
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||
) | |||
target_include_directories(ge_executor_shared PRIVATE | |||
@@ -26,7 +26,7 @@ | |||
#include "common/math/math_util.h" | |||
namespace { | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
#define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ | |||
case (DTYPE): { \ | |||
GeTensorPtr ge_tensor = nullptr; \ | |||
@@ -38,7 +38,7 @@ | |||
#include "graph/partition/stage_partition.h" | |||
#include "graph/passes/addn_pass.h" | |||
#include "graph/passes/bitcast_pass.h" | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
#include "graph/passes/assign_pass.h" | |||
#include "graph/passes/inplace_support_check_pass.h" | |||
#endif | |||
@@ -2241,7 +2241,7 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||
ReshapeRemovePass reshape_remove_pass; | |||
CondRemovePass condition_remove_pass; | |||
BitcastPass bitcast_pass; | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
AssignPass assign_pass; | |||
InplaceSupportCheckPass inplace_support_check_pass; | |||
#endif | |||
@@ -2249,7 +2249,7 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||
names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); | |||
names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass); | |||
names_to_passes.emplace_back("BitcastPass", &bitcast_pass); | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
if (GetContext().GetHostExecFlag()) { | |||
names_to_passes.emplace_back("AssignPass", &assign_pass); | |||
names_to_passes.emplace_back("InplaceSupportCheckPass", &inplace_support_check_pass); | |||
@@ -19,7 +19,7 @@ | |||
#include <string> | |||
#include "graph/manager/graph_caching_allocator.h" | |||
#include "graph/manager/rdma_pool_allocator.h" | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
#include "graph/manager/host_mem_allocator.h" | |||
#endif | |||
namespace ge { | |||
@@ -192,7 +192,7 @@ Status MemManager::Initialize(const std::vector<rtMemType_t> &memory_type) { | |||
GELOGE(ge::INTERNAL_ERROR, "Create RdmaAllocator failed."); | |||
return ge::INTERNAL_ERROR; | |||
} | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
if (InitAllocator(memory_type, host_allocator_map_) != SUCCESS) { | |||
GELOGE(ge::INTERNAL_ERROR, "Create HostMemAllocator failed."); | |||
return ge::INTERNAL_ERROR; | |||
@@ -219,7 +219,7 @@ void MemManager::Finalize() noexcept { | |||
// caching and rdma allocator use memory allocator, so finalize them first | |||
FinalizeAllocatorMap(caching_allocator_map_); | |||
FinalizeAllocatorMap(rdma_allocator_map_); | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
FinalizeAllocatorMap(host_allocator_map_); | |||
#endif | |||
FinalizeAllocatorMap(memory_allocator_map_); | |||
@@ -250,7 +250,7 @@ CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) { | |||
RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) { | |||
return Instance().GetAllocator(memory_type, rdma_allocator_map_); | |||
} | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) { | |||
return Instance().GetAllocator(memory_type, host_allocator_map_); | |||
} | |||
@@ -139,7 +139,7 @@ class MemoryAllocator { | |||
using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>; | |||
class CachingAllocator; | |||
class RdmaPoolAllocator; | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
class HostMemAllocator; | |||
#endif | |||
class MemManager { | |||
@@ -150,7 +150,7 @@ class MemManager { | |||
static MemoryAllocator *Instance(rtMemType_t memory_type); | |||
CachingAllocator &CachingInstance(rtMemType_t memory_type); | |||
RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type); | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
HostMemAllocator &HostMemInstance(rtMemType_t memory_type); | |||
#endif | |||
MemManager(const MemManager &) = delete; | |||
@@ -240,7 +240,7 @@ class MemManager { | |||
std::map<rtMemType_t, MemoryAllocator *> memory_allocator_map_; | |||
std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_; | |||
std::map<rtMemType_t, RdmaPoolAllocator *> rdma_allocator_map_; | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
std::map<rtMemType_t, HostMemAllocator *> host_allocator_map_; | |||
#endif | |||
std::recursive_mutex allocator_mutex_; | |||
@@ -43,7 +43,7 @@ Status SharedMemAllocator::Allocate(SharedMemInfo &mem_info) { | |||
return GE_GRAPH_MEMORY_ALLOC_FAILED; | |||
} | |||
mem_info.fd = output_para.fd; | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
mem_info.host_aligned_ptr = AlignedPtr::BuildAlignedPtr(mem_info.mem_size, | |||
[&output_para](std::unique_ptr<uint8_t[], deleter> &ptr) { | |||
GELOGD("set aligned_ptr, addr=%p", output_para.ptr); | |||
@@ -62,7 +62,7 @@ Status SharedMemAllocator::Allocate(SharedMemInfo &mem_info) { | |||
Status SharedMemAllocator::DeAllocate(SharedMemInfo &mem_info) { | |||
GELOGD("SharedMemAllocator::DeAllocate"); | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd, | |||
mem_info.host_aligned_ptr->MutableGet(), mem_info.device_address}; | |||
#else | |||
@@ -42,7 +42,7 @@ struct SharedMemInfo { | |||
uint64_t mem_size = 0; | |||
int fd = 0; | |||
uint8_t *device_address = nullptr; | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
std::shared_ptr<AlignedPtr> host_aligned_ptr = nullptr; | |||
#else | |||
uint8_t *host_address = nullptr; | |||
@@ -26,7 +26,7 @@ const int32_t kAssignValueInputIndex = 1; | |||
} | |||
namespace ge { | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
Status AssignPass::Run(NodePtr &node) { | |||
GELOGD("AssignPass running"); | |||
@@ -25,7 +25,7 @@ class AssignPass : public BaseNodePass { | |||
Status Run(NodePtr &node) override; | |||
private: | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
/// | |||
/// @brief Optimize for assign_node | |||
/// @param [in] assign_node | |||
@@ -115,7 +115,7 @@ void ConstantFuseSamePass::GetFuseConstNodes(ComputeGraphPtr &graph, | |||
TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
continue; | |||
} | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
if ((type_size != 0) && (weight->MutableData().GetAlignedPtr() == nullptr)) { | |||
GELOGW("aligned_ptr is null while size is not 0"); | |||
continue; | |||
@@ -125,7 +125,7 @@ void ConstantFuseSamePass::GetFuseConstNodes(ComputeGraphPtr &graph, | |||
SameConstKey map_key; | |||
map_key.data_size = type_size; | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
map_key.aligned_ptr = weight->MutableData().GetAlignedPtr(); | |||
#else | |||
map_key.data = weight->GetData().GetData(); | |||
@@ -21,7 +21,7 @@ | |||
#include <set> | |||
#include <utility> | |||
#include <vector> | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
#include "graph/aligned_ptr.h" | |||
#endif | |||
#include "graph/types.h" | |||
@@ -30,7 +30,7 @@ | |||
namespace ge { | |||
struct SameConstKey { | |||
int data_size; | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
std::shared_ptr<AlignedPtr> aligned_ptr; | |||
#else | |||
const uint8_t *data; | |||
@@ -44,7 +44,7 @@ struct SameConstKey { | |||
if (data_size != key.data_size) { | |||
return data_size < key.data_size; | |||
} | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
if (data_size != 0) { | |||
int ret = memcmp(aligned_ptr->Get(), key.aligned_ptr->Get(), data_size); | |||
if (ret != 0) { | |||
@@ -37,7 +37,7 @@ | |||
#include "graph/passes/addn_pass.h" | |||
#include "graph/passes/aicpu_constant_folding_pass.h" | |||
#include "graph/passes/assert_pass.h" | |||
#if (ENABLE_OPEN_SRC == True) | |||
#ifdef ONLY_COMPILE_OPEN_SRC | |||
#include "graph/passes/assign_pass.h" | |||
#endif | |||
#include "graph/passes/common_subexpression_elimination_pass.h" | |||
@@ -1700,7 +1700,7 @@ Status GraphPrepare::PrepareOptimize() { | |||
VarIsInitializedOpPass var_is_initialized_pass; | |||
ParallelConcatStartOpPass parallel_concat_start_op_pass; | |||
IdentityPass identity_pass(false); | |||
#if (ENABLE_OPEN_SRC == True) | |||
#ifdef ONLY_COMPILE_OPEN_SRC | |||
AssignPass assign_pass; | |||
#endif | |||
SnapshotPass snapshot_pass; | |||
@@ -1717,7 +1717,7 @@ Status GraphPrepare::PrepareOptimize() { | |||
names_to_passes.emplace_back("VarIsInitializedOpPass", &var_is_initialized_pass); | |||
names_to_passes.emplace_back("ParallelConcatStartOpPass", ¶llel_concat_start_op_pass); | |||
names_to_passes.emplace_back("IdentityPass", &identity_pass); | |||
#if (ENABLE_OPEN_SRC == True) | |||
#ifdef ONLY_COMPILE_OPEN_SRC | |||
if (GetContext().GetHostExecFlag()) { | |||
names_to_passes.emplace_back("AssignPass", &assign_pass); | |||
} | |||
@@ -20,7 +20,7 @@ | |||
#include "graph/manager/graph_caching_allocator.h" | |||
#include "graph/manager/graph_mem_allocator.h" | |||
#include "graph/manager/rdma_pool_allocator.h" | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
#include "graph/manager/host_mem_allocator.h" | |||
#endif | |||
@@ -67,7 +67,7 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) { | |||
if (mem_type == RDMA_HBM) { | |||
buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(allocate_size, device_id_); | |||
} else if (mem_type == HOST_DDR) { | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
buffer = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(allocate_size); | |||
#else | |||
buffer = malloc(allocate_size); | |||
@@ -108,7 +108,7 @@ void NpuMemoryAllocator::Deallocate(void *data, MemStorageType mem_type) { | |||
if (mem_type == RDMA_HBM) { | |||
MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_); | |||
} else if (mem_type == HOST_DDR) { | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Free(data); | |||
#else | |||
free(data); | |||
@@ -24,7 +24,7 @@ | |||
#include "graph/manager/graph_var_manager.h" | |||
#include "graph/manager/host_mem_manager.h" | |||
#include "graph/manager/trans_var_data_utils.h" | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
#include "graph/manager/graph_mem_allocator.h" | |||
#include "graph/manager/host_mem_allocator.h" | |||
#endif | |||
@@ -853,7 +853,7 @@ Status HybridModelBuilder::InitConstantOps() { | |||
std::unique_ptr<TensorValue> var_tensor; | |||
if (GetContext().GetHostExecFlag()) { | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
GE_CHECK_NOTNULL(ge_tensor); | |||
// Address for eigen kernel should be aligned with 16 bytes | |||
// Tensors return by api GetWeights share data with proto, whose addr is not confirmed to be aligned | |||
@@ -925,7 +925,7 @@ Status HybridModelBuilder::InitVariableTensors() { | |||
GELOGE(GE_GRAPH_MALLOC_FAILED, "Host variable [%s] malloc failed.", it.first.c_str()); | |||
return GE_GRAPH_MALLOC_FAILED; | |||
} | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(mem_info.host_aligned_ptr, | |||
tensor_size) == nullptr) { | |||
GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed."); | |||
@@ -18,7 +18,7 @@ | |||
#include "hybrid/node_executor/host_cpu/kernel_factory.h" | |||
#include "graph/passes/folding_pass.h" | |||
#include "hybrid/model/hybrid_model.h" | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
#include "graph/manager/graph_mem_allocator.h" | |||
#include "graph/manager/host_mem_allocator.h" | |||
#endif | |||
@@ -54,7 +54,7 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) { | |||
auto input_desc_ptr = context.GetInputDesc(i); | |||
GE_CHECK_NOTNULL(input_desc_ptr); | |||
const auto &input_desc = *input_desc_ptr; | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
auto tensor = context.GetInput(i); | |||
GE_CHECK_NOTNULL(tensor); | |||
auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); | |||
@@ -84,7 +84,7 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) { | |||
} | |||
auto tensor = context.GetOutput(i); | |||
GE_CHECK_NOTNULL(tensor); | |||
#if (ENABLE_OPEN_SRC != True) | |||
#ifndef ONLY_COMPILE_OPEN_SRC | |||
auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); | |||
GE_CHECK_NOTNULL(item.second); | |||
auto out_tensor = MakeShared<GeTensor>(output_desc, item.second, item.first); | |||