@@ -272,6 +272,7 @@ static void ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTensor> | |||||
std::vector<int64_t> dynamic_shape_dims = {kDynamicDimValue}; | std::vector<int64_t> dynamic_shape_dims = {kDynamicDimValue}; | ||||
GeShape dynamic_shape(dynamic_shape_dims); | GeShape dynamic_shape(dynamic_shape_dims); | ||||
std::vector<std::pair<int64_t, int64_t>> dynamic_shape_range; | |||||
ge::GeTensor inputTensor; | ge::GeTensor inputTensor; | ||||
ge::GeTensorDesc desc(input_desc); | ge::GeTensorDesc desc(input_desc); | ||||
@@ -280,6 +281,7 @@ static void ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTensor> | |||||
(void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const); | (void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const); | ||||
if (!is_const && shape_ori.GetDims().size() > 0) { | if (!is_const && shape_ori.GetDims().size() > 0) { | ||||
desc.SetShape(dynamic_shape); | desc.SetShape(dynamic_shape); | ||||
desc.SetShapeRange(dynamic_shape_range); | |||||
} | } | ||||
inputTensor.SetTensorDesc(desc); | inputTensor.SetTensorDesc(desc); | ||||
@@ -970,7 +970,7 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma | |||||
uint32_t parent_index = 0; // Ignore subgraph Data Node. | uint32_t parent_index = 0; // Ignore subgraph Data Node. | ||||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | if (AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | ||||
GELOGI("Init zero copy by subgraph Data node: %s.", op_desc->GetName().c_str()); | GELOGI("Init zero copy by subgraph Data node: %s.", op_desc->GetName().c_str()); | ||||
return InitInputBatchLabel(node); | |||||
return SUCCESS; | |||||
} | } | ||||
data_op_list_.push_back(op_desc); | data_op_list_.push_back(op_desc); | ||||
@@ -1011,10 +1011,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma | |||||
} | } | ||||
data_op_index++; | data_op_index++; | ||||
if (InitInputZeroCopyNodes(node) != SUCCESS) { | |||||
GELOGE(PARAM_INVALID, "Input zero copy nodes init failed!"); | |||||
return PARAM_INVALID; | |||||
} | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -1036,39 +1032,6 @@ void DavinciModel::AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_inde | |||||
} | } | ||||
} | } | ||||
/// | |||||
/// @ingroup ge | |||||
/// @brief input zero copy node Initialize. | |||||
/// @param [in] NodePtr: Data Op. | |||||
/// @return Status | |||||
/// | |||||
Status DavinciModel::InitInputZeroCopyNodes(const NodePtr &node) { | |||||
auto out_data_anchor = node->GetOutDataAnchor(kDataIndex); | |||||
if (out_data_anchor == nullptr) { | |||||
GELOGE(FAILED, "Out data anchor is nullptr"); | |||||
return FAILED; | |||||
} | |||||
for (auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { | |||||
auto node = peer_in_data_anchor->GetOwnerNode(); | |||||
auto op_desc = node->GetOpDesc(); | |||||
if (op_desc == nullptr) { | |||||
GELOGE(FAILED, "Op desc is nullptr"); | |||||
return FAILED; | |||||
} | |||||
string batch_label; | |||||
(void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); | |||||
if (batch_label.empty()) { | |||||
batch_label = kDefaultBatchLable; | |||||
} | |||||
if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { | |||||
zero_copy_op_id_batch_label_.emplace(pair<int64_t, string>(op_desc->GetId(), batch_label)); | |||||
GELOGD("Init input zero copy nodes success, op name:%s, op id: %ld, batch label: %s.", op_desc->GetName().c_str(), | |||||
op_desc->GetId(), batch_label.c_str()); | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | ||||
bool getnext_sink_dynamic = false; | bool getnext_sink_dynamic = false; | ||||
if (ge::AttrUtils::GetBool(op_desc, ATTR_GETNEXT_SINK_DYNMAIC, getnext_sink_dynamic) && getnext_sink_dynamic) { | if (ge::AttrUtils::GetBool(op_desc, ATTR_GETNEXT_SINK_DYNMAIC, getnext_sink_dynamic) && getnext_sink_dynamic) { | ||||
@@ -1094,7 +1057,7 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { | |||||
if (owner_graph->GetParentGraph() != nullptr) { | if (owner_graph->GetParentGraph() != nullptr) { | ||||
GELOGI("Init zero copy by subgraph NetOutput node: %s.", op_desc->GetName().c_str()); | GELOGI("Init zero copy by subgraph NetOutput node: %s.", op_desc->GetName().c_str()); | ||||
op_list_.erase(op_desc->GetId()); | op_list_.erase(op_desc->GetId()); | ||||
return InitOutputBatchLabel(node); | |||||
return SUCCESS; | |||||
} | } | ||||
output_op_list_.push_back(op_desc); | output_op_list_.push_back(op_desc); | ||||
@@ -1146,8 +1109,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { | |||||
} | } | ||||
} | } | ||||
GE_IF_BOOL_EXEC(InitOutputZeroCopyNodes(node) != SUCCESS, | |||||
GELOGE(PARAM_INVALID, "Output zero copy nodes init failed!"); return PARAM_INVALID;); | |||||
GetAllGearsInfo(node); | GetAllGearsInfo(node); | ||||
if (is_getnext_sink_dynamic_) { | if (is_getnext_sink_dynamic_) { | ||||
GE_IF_BOOL_EXEC(GetGetDynamicDimsNodeInfo(node) != SUCCESS, | GE_IF_BOOL_EXEC(GetGetDynamicDimsNodeInfo(node) != SUCCESS, | ||||
@@ -1343,121 +1304,6 @@ void DavinciModel::ParseDynamicOutShape(const std::vector<std::string> &str_info | |||||
} | } | ||||
} | } | ||||
/// | |||||
/// @ingroup ge | |||||
/// @brief output zero copy node Initialize. | |||||
/// @param [in] NodePtr: netoutput Op. | |||||
/// @return Status | |||||
/// | |||||
Status DavinciModel::InitOutputZeroCopyNodes(const NodePtr &node) { | |||||
set<NodePtr> nodes_need_record; | |||||
for (auto &in_data_anchor : node->GetAllInDataAnchors()) { | |||||
auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); | |||||
if (peer_out_data_anchor == nullptr) { | |||||
continue; | |||||
} | |||||
auto peer_node = peer_out_data_anchor->GetOwnerNode(); | |||||
nodes_need_record.emplace(peer_node); | |||||
// Merge node output multiplexed input, upstream nodes need to be considered in multiple batch scenarios | |||||
if (peer_node->GetType() == MERGE) { | |||||
for (const auto &merge_peer_in_data_anchor : peer_node->GetAllInDataAnchors()) { | |||||
auto merge_peer_out_data_anchor = merge_peer_in_data_anchor->GetPeerOutAnchor(); | |||||
if (merge_peer_out_data_anchor == nullptr) { | |||||
continue; | |||||
} | |||||
auto merge_peer_node = merge_peer_out_data_anchor->GetOwnerNode(); | |||||
nodes_need_record.emplace(merge_peer_node); | |||||
} | |||||
} else { | |||||
for (const auto &other_in_data_anchor : peer_out_data_anchor->GetPeerInDataAnchors()) { | |||||
auto other_in_node = other_in_data_anchor->GetOwnerNode(); | |||||
if (other_in_node->GetType() != NETOUTPUT) { | |||||
nodes_need_record.emplace(other_in_node); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
for (const auto &node_need_record : nodes_need_record) { | |||||
auto op_desc = node_need_record->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
string batch_label; | |||||
(void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); | |||||
if (batch_label.empty()) { | |||||
batch_label = kDefaultBatchLable; | |||||
} | |||||
if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { | |||||
zero_copy_op_id_batch_label_.emplace(pair<int64_t, string>(op_desc->GetId(), batch_label)); | |||||
GELOGD("Init Output zero copy nodes success, op name:%s, op id: %ld, batch label: %s.", | |||||
op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str()); | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief input zero copy node Initialize. | |||||
/// @param [in] NodePtr: Data Op. | |||||
/// @return Status | |||||
/// | |||||
Status DavinciModel::InitInputBatchLabel(const NodePtr &node) { | |||||
string batch_label; | |||||
if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { | |||||
return SUCCESS; // Not Multi-batch. | |||||
} | |||||
const auto &out_data_anchor = node->GetOutDataAnchor(kDataIndex); | |||||
GE_CHECK_NOTNULL(out_data_anchor); | |||||
for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { | |||||
const auto &node = peer_in_data_anchor->GetOwnerNode(); | |||||
const auto &op_desc = node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { | |||||
zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label; | |||||
GELOGD("Init input zero copy nodes success, op name: %s, op id: %ld, batch label: %s", op_desc->GetName().c_str(), | |||||
op_desc->GetId(), batch_label.c_str()); | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief output zero copy node Initialize for Case. | |||||
/// @param [in] NodePtr: netoutput Op. | |||||
/// @return Status | |||||
/// | |||||
Status DavinciModel::InitOutputBatchLabel(const NodePtr &node) { | |||||
string batch_label; | |||||
if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { | |||||
return SUCCESS; // Not Multi-batch. | |||||
} | |||||
for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { | |||||
const auto &peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); | |||||
if (peer_out_data_anchor == nullptr) { | |||||
continue; | |||||
} | |||||
const auto &peer_node = peer_out_data_anchor->GetOwnerNode(); | |||||
const auto &op_desc = peer_node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { | |||||
zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label; | |||||
GELOGD("Init Output zero copy nodes success, op name: %s, op id: %ld, batch label: %s", | |||||
op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str()); | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief LabelSet Op Initialize. | /// @brief LabelSet Op Initialize. | ||||
/// @param [in] op_desc: LabelSet Op descriptor. | /// @param [in] op_desc: LabelSet Op descriptor. | ||||
@@ -3257,27 +3103,20 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<v | |||||
for (auto &input_outside_addrs : new_input_outside_addrs_) { | for (auto &input_outside_addrs : new_input_outside_addrs_) { | ||||
ZeroCopyOffset &input_outside = input_outside_addrs.second; | ZeroCopyOffset &input_outside = input_outside_addrs.second; | ||||
bool ret = input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | |||||
if (ret) { | |||||
void *args_val = static_cast<uint8_t *>(args) + offset + i * kAddrLen; | |||||
SetBatchLabelAddr(op_desc, reinterpret_cast<uintptr_t>(args_val)); | |||||
} | |||||
input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | |||||
} | } | ||||
for (auto &output_outside_addrs : new_output_outside_addrs_) { | for (auto &output_outside_addrs : new_output_outside_addrs_) { | ||||
ZeroCopyOffset &output_outside = output_outside_addrs.second; | ZeroCopyOffset &output_outside = output_outside_addrs.second; | ||||
bool ret = output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | |||||
if (ret) { | |||||
void *args_val = static_cast<uint8_t *>(args) + offset + i * kAddrLen; | |||||
SetBatchLabelAddr(op_desc, reinterpret_cast<uintptr_t>(args_val)); | |||||
} | |||||
output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | |||||
} | } | ||||
} | } | ||||
auto it = zero_copy_op_id_batch_label_.find(op_desc->GetId()); | |||||
if (it == zero_copy_op_id_batch_label_.end()) { | |||||
string batch_label; | |||||
if (!AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label) || batch_label.empty()) { | |||||
zero_copy_task.SetBatchLabel(kDefaultBatchLable); | zero_copy_task.SetBatchLabel(kDefaultBatchLable); | ||||
} else { | } else { | ||||
zero_copy_task.SetBatchLabel(it->second); | |||||
zero_copy_task.SetBatchLabel(batch_label); | |||||
} | } | ||||
std::lock_guard<std::mutex> lock(outside_addrs_mutex_); | std::lock_guard<std::mutex> lock(outside_addrs_mutex_); | ||||
@@ -3287,27 +3126,6 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<v | |||||
} | } | ||||
} | } | ||||
void DavinciModel::SetBatchLabelAddr(const OpDescPtr &op_desc, uintptr_t addr) { | |||||
// Establish a mapping between batch label and zero copy address for multi-batch scenes | |||||
auto it = zero_copy_op_id_batch_label_.find(op_desc->GetId()); | |||||
if (it == zero_copy_op_id_batch_label_.end()) { | |||||
return; | |||||
} | |||||
const string &batch_label = it->second; | |||||
auto iter = zero_copy_batch_label_addrs_.find(batch_label); | |||||
if (iter != zero_copy_batch_label_addrs_.end()) { | |||||
iter->second.insert(addr); | |||||
GELOGD("[ZCPY] Set zero copy batch label and addrs success, batch label: %s, op name:%s.", batch_label.c_str(), | |||||
op_desc->GetName().c_str()); | |||||
} else { | |||||
set<uintptr_t> addrs = {addr}; | |||||
zero_copy_batch_label_addrs_.emplace(pair<string, set<uintptr_t>>(batch_label, addrs)); | |||||
GELOGD("[ZCPY] New added zero copy batch label and addrs success, batch label: %s, op name:%s.", | |||||
batch_label.c_str(), op_desc->GetName().c_str()); | |||||
} | |||||
} | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief Copy Check input size and model op size. | /// @brief Copy Check input size and model op size. | ||||
@@ -3441,15 +3259,15 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> & | |||||
void *addr = data.second.GetDataInfo().at(count).second; | void *addr = data.second.GetDataInfo().at(count).second; | ||||
void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data) + | void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data) + | ||||
data.second.GetRelativeOffset().at(count)); | data.second.GetRelativeOffset().at(count)); | ||||
GELOGI("[ZCPY] Copy %s blobs_index %u, virtual_addr: %p, size: %ld, user_data_addr: %p", input_or_output.c_str(), | |||||
data.first, addr, size, buffer_addr); | |||||
GELOGI("[ZCPY] Copy %s blobs_index %u, virtual_addr: %p, size: %ld, user_data_addr: %p, batch_label: %s", | |||||
input_or_output.c_str(), data.first, addr, size, buffer_addr, batch_label.c_str()); | |||||
// For input data, just copy for rts task. | // For input data, just copy for rts task. | ||||
for (ZeroCopyTask &task : zero_copy_tasks_) { | for (ZeroCopyTask &task : zero_copy_tasks_) { | ||||
if (task.GetBatchLabel() != kDefaultBatchLable && task.GetBatchLabel() != batch_label) { | if (task.GetBatchLabel() != kDefaultBatchLable && task.GetBatchLabel() != batch_label) { | ||||
continue; | continue; | ||||
} | } | ||||
uintptr_t addr_val = reinterpret_cast<uintptr_t>(addr); | uintptr_t addr_val = reinterpret_cast<uintptr_t>(addr); | ||||
if (task.UpdateTaskParam(addr_val, buffer_addr, zero_copy_batch_label_addrs_, batch_label) != SUCCESS) { | |||||
if (task.UpdateTaskParam(addr_val, buffer_addr) != SUCCESS) { | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -3811,9 +3629,6 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||||
GELOGD("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_); | GELOGD("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_); | ||||
GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed."); | GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed."); | ||||
is_dynamic_ = input_data.is_dynamic_batch; | is_dynamic_ = input_data.is_dynamic_batch; | ||||
if (!is_dynamic_) { | |||||
zero_copy_batch_label_addrs_.clear(); | |||||
} | |||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_START)); | GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_START)); | ||||
Status ret = CopyModelData(input_data, output_data, is_dynamic_); | Status ret = CopyModelData(input_data, output_data, is_dynamic_); | ||||
@@ -531,15 +531,6 @@ class DavinciModel { | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief Save Batch label Info. | |||||
/// @param [in] const OpDescPtr &op_desc | |||||
/// @param [in] uintptr_t addr: address value in args block. | |||||
/// @return None. | |||||
/// | |||||
void SetBatchLabelAddr(const OpDescPtr &op_desc, uintptr_t addr); | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief Copy Check input size and model op size. | /// @brief Copy Check input size and model op size. | ||||
/// @param [in] const int64_t &input_size: input size. | /// @param [in] const int64_t &input_size: input size. | ||||
/// @param [in] const int64_t &op_size: model op size. | /// @param [in] const int64_t &op_size: model op size. | ||||
@@ -651,14 +642,6 @@ class DavinciModel { | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief input zero copy node Initialize. | |||||
/// @param [in] NodePtr: Data Op. | |||||
/// @return Status | |||||
/// | |||||
Status InitInputZeroCopyNodes(const NodePtr &node); | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief NetOutput Op Initialize. | /// @brief NetOutput Op Initialize. | ||||
/// @param [in] NodePtr: NetOutput Op. | /// @param [in] NodePtr: NetOutput Op. | ||||
/// @return Status | /// @return Status | ||||
@@ -667,30 +650,6 @@ class DavinciModel { | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief output zero copy node Initialize. | |||||
/// @param [in] NodePtr: Data Op. | |||||
/// @return Status | |||||
/// | |||||
Status InitOutputZeroCopyNodes(const NodePtr &node); | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief input zero copy node Initialize for Case. | |||||
/// @param [in] NodePtr: Data Op. | |||||
/// @return Status | |||||
/// | |||||
Status InitInputBatchLabel(const NodePtr &node); | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief output zero copy node Initialize for Case. | |||||
/// @param [in] NodePtr: netoutput Op. | |||||
/// @return Status | |||||
/// | |||||
Status InitOutputBatchLabel(const NodePtr &node); | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief Constant Op Init. | /// @brief Constant Op Init. | ||||
/// @return Status | /// @return Status | ||||
/// | /// | ||||
@@ -914,11 +873,6 @@ class DavinciModel { | |||||
std::vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr. | std::vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr. | ||||
std::set<const void *> copy_only_addrs_; // Address need copy to original place. | std::set<const void *> copy_only_addrs_; // Address need copy to original place. | ||||
// {op_id, batch_label} | |||||
std::map<int64_t, std::string> zero_copy_op_id_batch_label_; | |||||
// {batch_label, addrs} | |||||
std::map<std::string, std::set<uintptr_t>> zero_copy_batch_label_addrs_; | |||||
std::vector<TaskInfoPtr> task_list_; | std::vector<TaskInfoPtr> task_list_; | ||||
// rt_moodel_handle | // rt_moodel_handle | ||||
rtModel_t rt_model_handle_; | rtModel_t rt_model_handle_; | ||||
@@ -183,22 +183,18 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo | |||||
addr_count_ = out_count; | addr_count_ = out_count; | ||||
} | } | ||||
bool ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { | |||||
void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { | |||||
const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr); | const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr); | ||||
bool set_batch_label_flag = false; | |||||
for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) { | for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) { | ||||
auto &addrs_mapping_list = GetOutsideAddrs(); | |||||
auto args_addrs = addrs_mapping_list[out_count].find(outside_addr); | |||||
if (args_addrs != addrs_mapping_list[out_count].end()) { | |||||
auto args_addrs = outside_addrs_[out_count].find(outside_addr); | |||||
if (args_addrs != outside_addrs_[out_count].end()) { | |||||
GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), "Input args invalid."); | GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), "Input args invalid."); | ||||
void *args_val = static_cast<uint8_t *>(args) + offset; | void *args_val = static_cast<uint8_t *>(args) + offset; | ||||
args_addrs->second.push_back(args_val); | args_addrs->second.push_back(args_val); | ||||
GELOGD("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val, | GELOGD("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val, | ||||
args, offset); | args, offset); | ||||
set_batch_label_flag = true; | |||||
} | } | ||||
} | } | ||||
return set_batch_label_flag; | |||||
} | } | ||||
} // namespace ge | } // namespace ge |
@@ -51,7 +51,7 @@ class ZeroCopyOffset { | |||||
const OpDescPtr &op_desc, const size_t &idx, bool &fusion_flag); | const OpDescPtr &op_desc, const size_t &idx, bool &fusion_flag); | ||||
void SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr, | void SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr, | ||||
std::vector<void *> &tensor_addrs); | std::vector<void *> &tensor_addrs); | ||||
bool SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset); | |||||
void SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset); | |||||
// basic_addr of l2-fusion | // basic_addr of l2-fusion | ||||
void *GetBasicAddr() const { return basic_addr_; } | void *GetBasicAddr() const { return basic_addr_; } | ||||
@@ -22,8 +22,6 @@ | |||||
#include "common/ge_compiler_options.h" | #include "common/ge_compiler_options.h" | ||||
namespace ge { | namespace ge { | ||||
const char *const kDefaultBatchLable = "Batch_default"; | |||||
ZeroCopyTask::ZeroCopyTask(const string &name, uint8_t *args, size_t size) | ZeroCopyTask::ZeroCopyTask(const string &name, uint8_t *args, size_t size) | ||||
: name_(name), args_addr_(args), args_size_(size), is_updated_(false) {} | : name_(name), args_addr_(args), args_size_(size), is_updated_(false) {} | ||||
@@ -66,59 +64,18 @@ void ZeroCopyTask::SetOriginalArgs(const void *info, size_t size) { | |||||
const uint8_t *data = static_cast<const uint8_t *>(info); | const uint8_t *data = static_cast<const uint8_t *>(info); | ||||
args_info_.assign(data, data + size); | args_info_.assign(data, data + size); | ||||
GELOGI("[ZCPY] %s set info from virtual_addr: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info, | |||||
GELOGI("[ZCPY] %s set original args info: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info, | |||||
args_addr_, args_size_, size); | args_addr_, args_size_, size); | ||||
} | } | ||||
/** | /** | ||||
* @ingroup ge | * @ingroup ge | ||||
* @brief Check is dynamic batch node. | |||||
* @param [in] addr: virtual address value from Op. | |||||
* @param [in] data: data buffer from user. | |||||
* @param [in] batch_addrs: dynamic batch addr info. | |||||
* @param [in] batch_label: batch label. | |||||
* @return: true / false | |||||
*/ | |||||
bool ZeroCopyTask::CheckDynamicBatch(const map<string, set<uintptr_t>> &batch_addrs, const string &batch_label, | |||||
uintptr_t addr) { | |||||
// Used for dynamic batch / resolution scene | |||||
set<uintptr_t> dynamic_input_addrs; | |||||
auto dynamic_input_iter = batch_addrs.find(batch_label); | |||||
if (dynamic_input_iter != batch_addrs.end()) { | |||||
dynamic_input_addrs = dynamic_input_iter->second; | |||||
} | |||||
set<uintptr_t> fix_input_addrs; | |||||
auto fix_input_iter = batch_addrs.find(kDefaultBatchLable); | |||||
if (fix_input_iter != batch_addrs.end()) { | |||||
fix_input_addrs = fix_input_iter->second; | |||||
} | |||||
if (fix_input_addrs.empty()) { | |||||
if (!dynamic_input_addrs.empty() && dynamic_input_addrs.find(addr) == dynamic_input_addrs.end()) { | |||||
return false; | |||||
} | |||||
} else { | |||||
if (!dynamic_input_addrs.empty() && dynamic_input_addrs.find(addr) == dynamic_input_addrs.end() && | |||||
fix_input_addrs.find(addr) == fix_input_addrs.end()) { | |||||
return false; | |||||
} | |||||
} | |||||
return true; | |||||
} | |||||
/** | |||||
* @ingroup ge | |||||
* @brief Set user data addr to Task param. | * @brief Set user data addr to Task param. | ||||
* @param [in] addr: virtual address value from Op. | * @param [in] addr: virtual address value from Op. | ||||
* @param [in] buffer_addr: real_data_buffer_addr from user. | * @param [in] buffer_addr: real_data_buffer_addr from user. | ||||
* @param [in] batch_addrs: dynamic batch addr info. | |||||
* @param [in] batch_label: batch label. | |||||
* @return: void | * @return: void | ||||
*/ | */ | ||||
Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map<string, set<uintptr_t>> &batch_addrs, | |||||
const string &batch_label) { | |||||
Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr) { | |||||
auto iter = task_addr_offset_.find(addr); | auto iter = task_addr_offset_.find(addr); | ||||
if (iter != task_addr_offset_.end()) { | if (iter != task_addr_offset_.end()) { | ||||
auto &cur_pair = *iter; | auto &cur_pair = *iter; | ||||
@@ -550,7 +550,8 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr | |||||
(void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); | (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); | ||||
} | } | ||||
std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, | std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, | ||||
compute_graph->GetGraphID(), subgraph, compute_graph, session_id, | |||||
compute_graph->GetGraphID(), subgraph, | |||||
compute_graph->GetName(), session_id, | |||||
GetThreadLocalContext()); | GetThreadLocalContext()); | ||||
if (!f.valid()) { | if (!f.valid()) { | ||||
GELOGE(FAILED, "Future is invalid"); | GELOGE(FAILED, "Future is invalid"); | ||||
@@ -565,7 +566,8 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr | |||||
(void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); | (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); | ||||
} | } | ||||
std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, | std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, | ||||
compute_graph->GetGraphID(), subgraph, compute_graph, session_id, | |||||
compute_graph->GetGraphID(), subgraph, | |||||
compute_graph->GetName(), session_id, | |||||
GetThreadLocalContext()); | GetThreadLocalContext()); | ||||
if (!f.valid()) { | if (!f.valid()) { | ||||
GELOGE(FAILED, "Future is invalid"); | GELOGE(FAILED, "Future is invalid"); | ||||
@@ -2471,7 +2473,8 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra | |||||
Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id, | Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id, | ||||
const SubGraphInfoPtr &sub_graph_info_ptr, | const SubGraphInfoPtr &sub_graph_info_ptr, | ||||
const ComputeGraphPtr &compute_graph, uint64_t session_id, | |||||
const std::string &root_graph_name, | |||||
uint64_t session_id, | |||||
const GEThreadLocalContext &ge_context) { | const GEThreadLocalContext &ge_context) { | ||||
if (sub_graph_info_ptr != nullptr && graph_manager != nullptr) { | if (sub_graph_info_ptr != nullptr && graph_manager != nullptr) { | ||||
GetContext().SetSessionId(session_id); | GetContext().SetSessionId(session_id); | ||||
@@ -2488,9 +2491,13 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager | |||||
GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_ID for subgraph, graph_id: %u.", root_graph_id); | GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_ID for subgraph, graph_id: %u.", root_graph_id); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
if (!AttrUtils::SetStr(*compute_graph_tmp, ATTR_NAME_ROOT_GRAPH_NAME, root_graph_name)) { | |||||
GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_NAME for subgraph, \ | |||||
root_graph_name: %s.", root_graph_name.c_str()); | |||||
return FAILED; | |||||
} | |||||
compute_graph_tmp->SetSessionID(session_id); | compute_graph_tmp->SetSessionID(session_id); | ||||
Status ret = graph_manager->GetCompilerStages(root_graph_id).optimizer.OptimizeSubGraph(compute_graph_tmp, | Status ret = graph_manager->GetCompilerStages(root_graph_id).optimizer.OptimizeSubGraph(compute_graph_tmp, | ||||
compute_graph, | |||||
engine_name); | engine_name); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "SubGraph optimize Failed %s", engine_name.c_str()); | GELOGE(ret, "SubGraph optimize Failed %s", engine_name.c_str()); | ||||
@@ -219,7 +219,8 @@ class GraphManager { | |||||
static Status ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id, | static Status ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id, | ||||
const SubGraphInfoPtr &sub_graph_info_ptr, | const SubGraphInfoPtr &sub_graph_info_ptr, | ||||
const ComputeGraphPtr &compute_graph, uint64_t session_id, | |||||
const std::string &root_graph_name, | |||||
uint64_t session_id, | |||||
const GEThreadLocalContext &ge_context); | const GEThreadLocalContext &ge_context); | ||||
Status ParseInputsDims(const std::vector<InputTensorInfo> &input_tensor); | Status ParseInputsDims(const std::vector<InputTensorInfo> &input_tensor); | ||||
void ParseInputsDimsForData(const std::vector<InputTensorInfo> &input_tensor); | void ParseInputsDimsForData(const std::vector<InputTensorInfo> &input_tensor); | ||||
@@ -76,8 +76,7 @@ void AddNodeInputProperty(ComputeGraphPtr &compute_graph) { | |||||
} | } | ||||
} | } | ||||
Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const ComputeGraphPtr &parent_graph, | |||||
const std::string &engine_name) { | |||||
Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std::string &engine_name) { | |||||
if (compute_graph == nullptr) { | if (compute_graph == nullptr) { | ||||
GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeSubGraph]: compute_graph is nullptr."); | GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeSubGraph]: compute_graph is nullptr."); | ||||
return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL; | return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL; | ||||
@@ -106,10 +105,6 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const Com | |||||
for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) { | for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) { | ||||
Status ret = (*iter)->OptimizeFusedGraphAfterGraphSlice(*(compute_graph)); | Status ret = (*iter)->OptimizeFusedGraphAfterGraphSlice(*(compute_graph)); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
auto root_graph = ge::GraphUtils::FindRootGraph(parent_graph); | |||||
if (root_graph != nullptr) { | |||||
ErrorManager::GetInstance().SaveMstuneCompileFailedMsg(root_graph->GetName()); | |||||
} | |||||
GELOGE(ret, "[OptimizeSubGraph][OptimizeFusedGraphAfterGraphSlice]: graph optimize failed, ret:%d", ret); | GELOGE(ret, "[OptimizeSubGraph][OptimizeFusedGraphAfterGraphSlice]: graph optimize failed, ret:%d", ret); | ||||
return ret; | return ret; | ||||
} | } | ||||
@@ -42,8 +42,7 @@ class GraphOptimize { | |||||
~GraphOptimize() = default; | ~GraphOptimize() = default; | ||||
// subgraph optimize | // subgraph optimize | ||||
Status OptimizeSubGraph(ComputeGraphPtr &compute_graph, const ComputeGraphPtr &parent_graph, | |||||
const std::string &engine_name); | |||||
Status OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std::string &engine_name); | |||||
// original graph optimize | // original graph optimize | ||||
Status OptimizeOriginalGraph(ComputeGraphPtr &compute_graph); | Status OptimizeOriginalGraph(ComputeGraphPtr &compute_graph); | ||||
@@ -18,6 +18,7 @@ | |||||
#include <map> | #include <map> | ||||
#include <set> | #include <set> | ||||
#include <string> | #include <string> | ||||
#include <utility> | |||||
#include "common/formats/format_transfers/format_transfer_fractal_nz.h" | #include "common/formats/format_transfers/format_transfer_fractal_nz.h" | ||||
#include "common/formats/format_transfers/format_transfer_fractal_z.h" | #include "common/formats/format_transfers/format_transfer_fractal_z.h" | ||||
#include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h" | #include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h" | ||||
@@ -27,9 +28,13 @@ | |||||
#include "common/helper/model_helper.h" | #include "common/helper/model_helper.h" | ||||
#include "common/math/math_util.h" | #include "common/math/math_util.h" | ||||
#include "common/op/ge_op_utils.h" | #include "common/op/ge_op_utils.h" | ||||
#include "common/util/error_manager/error_manager.h" | |||||
#include "common/formats/utils/formats_trans_utils.h" | |||||
#include "framework/common/debug/ge_log.h" | |||||
#include "graph/common/ge_call_wrapper.h" | #include "graph/common/ge_call_wrapper.h" | ||||
#include "graph/common/local_context.h" | #include "graph/common/local_context.h" | ||||
#include "graph/common/transop_util.h" | #include "graph/common/transop_util.h" | ||||
#include "graph/debug/ge_attr_define.h" | |||||
#include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
#include "graph/shape_refiner.h" | #include "graph/shape_refiner.h" | ||||
#include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
@@ -39,21 +44,29 @@ | |||||
#include "graph/passes/aicpu_constant_folding_pass.h" | #include "graph/passes/aicpu_constant_folding_pass.h" | ||||
#include "graph/passes/assert_pass.h" | #include "graph/passes/assert_pass.h" | ||||
#include "graph/passes/assign_pass.h" | #include "graph/passes/assign_pass.h" | ||||
#include "graph/passes/base_pass.h" | |||||
#include "graph/passes/common_subexpression_elimination_pass.h" | #include "graph/passes/common_subexpression_elimination_pass.h" | ||||
#include "graph/passes/cond_pass.h" | #include "graph/passes/cond_pass.h" | ||||
#include "graph/passes/cond_remove_pass.h" | #include "graph/passes/cond_remove_pass.h" | ||||
#include "graph/passes/constant_folding_pass.h" | #include "graph/passes/constant_folding_pass.h" | ||||
#include "graph/passes/constant_fuse_same_pass.h" | |||||
#include "graph/passes/control_trigger_pass.h" | |||||
#include "graph/passes/dimension_adjust_pass.h" | #include "graph/passes/dimension_adjust_pass.h" | ||||
#include "graph/passes/dimension_compute_pass.h" | #include "graph/passes/dimension_compute_pass.h" | ||||
#include "graph/passes/dropout_pass.h" | #include "graph/passes/dropout_pass.h" | ||||
#include "graph/passes/enter_pass.h" | #include "graph/passes/enter_pass.h" | ||||
#include "graph/passes/flow_ctrl_pass.h" | |||||
#include "graph/passes/for_pass.h" | #include "graph/passes/for_pass.h" | ||||
#include "graph/passes/get_original_format_pass.h" | |||||
#include "graph/passes/guarantee_const_pass.h" | #include "graph/passes/guarantee_const_pass.h" | ||||
#include "graph/passes/hccl_group_pass.h" | #include "graph/passes/hccl_group_pass.h" | ||||
#include "graph/passes/hccl_memcpy_pass.h" | #include "graph/passes/hccl_memcpy_pass.h" | ||||
#include "graph/passes/identity_pass.h" | #include "graph/passes/identity_pass.h" | ||||
#include "graph/passes/infershape_pass.h" | #include "graph/passes/infershape_pass.h" | ||||
#include "graph/passes/iterator_op_pass.h" | |||||
#include "graph/passes/merge_pass.h" | |||||
#include "graph/passes/net_output_pass.h" | #include "graph/passes/net_output_pass.h" | ||||
#include "graph/passes/next_iteration_pass.h" | |||||
#include "graph/passes/no_use_reshape_remove_pass.h" | #include "graph/passes/no_use_reshape_remove_pass.h" | ||||
#include "graph/passes/parallel_concat_start_op_pass.h" | #include "graph/passes/parallel_concat_start_op_pass.h" | ||||
#include "graph/passes/placeholder_with_default_pass.h" | #include "graph/passes/placeholder_with_default_pass.h" | ||||
@@ -68,18 +81,45 @@ | |||||
#include "graph/passes/shape_operate_op_remove_pass.h" | #include "graph/passes/shape_operate_op_remove_pass.h" | ||||
#include "graph/passes/snapshot_pass.h" | #include "graph/passes/snapshot_pass.h" | ||||
#include "graph/passes/stop_gradient_pass.h" | #include "graph/passes/stop_gradient_pass.h" | ||||
#include "graph/passes/subgraph_pass.h" | |||||
#include "graph/passes/switch_data_edges_bypass.h" | |||||
#include "graph/passes/switch_dead_branch_elimination.h" | |||||
#include "graph/passes/switch_logic_remove_pass.h" | |||||
#include "graph/passes/merge_to_stream_merge_pass.h" | |||||
#include "graph/passes/switch_to_stream_switch_pass.h" | |||||
#include "graph/passes/attach_stream_label_pass.h" | |||||
#include "graph/passes/unused_const_pass.h" | #include "graph/passes/unused_const_pass.h" | ||||
#include "graph/passes/unused_op_remove_pass.h" | |||||
#include "graph/passes/var_is_initialized_op_pass.h" | #include "graph/passes/var_is_initialized_op_pass.h" | ||||
#include "graph/passes/variable_prepare_op_pass.h" | #include "graph/passes/variable_prepare_op_pass.h" | ||||
#include "graph/preprocess/insert_op/util_insert_aipp_op.h" | #include "graph/preprocess/insert_op/util_insert_aipp_op.h" | ||||
#include "graph/types.h" | |||||
#include "graph/utils/tensor_utils.h" | |||||
#include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
#include "inc/pass_manager.h" | #include "inc/pass_manager.h" | ||||
#include "init/gelib.h" | #include "init/gelib.h" | ||||
#include "multi_batch_copy_graph.h" | #include "multi_batch_copy_graph.h" | ||||
#include "runtime/dev.h" | |||||
#include "graph/passes/dimension_adjust_pass.h" | |||||
#include "graph/passes/link_gen_mask_nodes_pass.h" | |||||
#include "graph/passes/permute_pass.h" | |||||
#include "graph/passes/reshape_remove_pass.h" | |||||
#include "graph/passes/same_transdata_breadth_fusion_pass.h" | |||||
#include "graph/passes/transop_breadth_fusion_pass.h" | |||||
#include "graph/passes/transop_depth_fusion_pass.h" | |||||
#include "graph/passes/transop_nearby_allreduce_fusion_pass.h" | |||||
#include "graph/passes/cast_remove_pass.h" | |||||
#include "graph/passes/data_pass.h" | #include "graph/passes/data_pass.h" | ||||
#include "graph/passes/transop_without_reshape_fusion_pass.h" | |||||
#include "graph/passes/transpose_transdata_pass.h" | |||||
#include "graph/passes/variable_op_pass.h" | |||||
#include "graph/passes/variable_prepare_op_pass.h" | |||||
#include "graph/passes/variable_ref_delete_op_pass.h" | |||||
#include "graph/passes/mark_agnostic_pass.h" | #include "graph/passes/mark_agnostic_pass.h" | ||||
namespace ge { | namespace ge { | ||||
namespace { | namespace { | ||||
static std::map<std::string, ge::DataType> output_type_str_to_datatype = { | static std::map<std::string, ge::DataType> output_type_str_to_datatype = { | ||||
@@ -22,6 +22,7 @@ | |||||
#include "common/blocking_queue.h" | #include "common/blocking_queue.h" | ||||
#include "common/properties_manager.h" | #include "common/properties_manager.h" | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/ge_local_context.h" | |||||
#include "hybrid/common/npu_memory_allocator.h" | #include "hybrid/common/npu_memory_allocator.h" | ||||
#include "hybrid/common/tensor_value.h" | #include "hybrid/common/tensor_value.h" | ||||
#include "hybrid/executor/hybrid_profiler.h" | #include "hybrid/executor/hybrid_profiler.h" | ||||
@@ -38,6 +39,7 @@ struct GraphExecutionContext { | |||||
uint64_t session_id = 0; | uint64_t session_id = 0; | ||||
const HybridModel *model = nullptr; | const HybridModel *model = nullptr; | ||||
const GEThreadLocalContext *ge_context = nullptr; | |||||
rtStream_t stream = nullptr; | rtStream_t stream = nullptr; | ||||
rtContext_t rt_context = nullptr; | rtContext_t rt_context = nullptr; | ||||
rtContext_t rt_gen_context = nullptr; | rtContext_t rt_gen_context = nullptr; | ||||
@@ -95,6 +95,7 @@ Status HybridModelExecutor::InitExecutionContext() { | |||||
context_.stream = stream_; | context_.stream = stream_; | ||||
context_.model = model_; | context_.model = model_; | ||||
context_.session_id = ::ge::GetContext().SessionId(); | context_.session_id = ::ge::GetContext().SessionId(); | ||||
context_.ge_context = &GetThreadLocalContext(); | |||||
GELOGD("session id from model = %lu, from context = %lu", model_->GetSessionId(), context_.session_id); | GELOGD("session id from model = %lu, from context = %lu", model_->GetSessionId(), context_.session_id); | ||||
context_.allocator = NpuMemoryAllocator::GetAllocator(device_id_); | context_.allocator = NpuMemoryAllocator::GetAllocator(device_id_); | ||||
GE_CHECK_NOTNULL(context_.allocator); | GE_CHECK_NOTNULL(context_.allocator); | ||||
@@ -26,6 +26,9 @@ Status TaskCompileEngine::Compile(NodeState &node_state, GraphExecutionContext * | |||||
RECORD_COMPILE_EVENT(context, node_item.NodeName().c_str(), "[Compile] Start"); | RECORD_COMPILE_EVENT(context, node_item.NodeName().c_str(), "[Compile] Start"); | ||||
GE_CHK_RT_RET(rtCtxSetCurrent(context->rt_gen_context)); | GE_CHK_RT_RET(rtCtxSetCurrent(context->rt_gen_context)); | ||||
if (context->ge_context != nullptr) { | |||||
GetThreadLocalContext() = *context->ge_context; | |||||
} | |||||
shared_ptr<NodeTask> kernel_task; | shared_ptr<NodeTask> kernel_task; | ||||
auto ret = node_item.node_executor->CompileTask(*context->model, node_item.node, kernel_task); | auto ret = node_item.node_executor->CompileTask(*context->model, node_item.node, kernel_task); | ||||
RECORD_COMPILE_EVENT(context, node_state.GetName().c_str(), "[Compile] End"); | RECORD_COMPILE_EVENT(context, node_state.GetName().c_str(), "[Compile] End"); | ||||
@@ -39,7 +39,7 @@ size_t GetAlignedSize(size_t size) { | |||||
} | } | ||||
Status ProfilingTaskInfo(OpTask *op_task) { | Status ProfilingTaskInfo(OpTask *op_task) { | ||||
if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||||
if (!ProfilingManager::Instance().ProfilingModelLoadOn()) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -119,11 +119,11 @@ Status OpTask::DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_works | |||||
uintptr_t *arg_base = nullptr; | uintptr_t *arg_base = nullptr; | ||||
size_t arg_num = 0; | size_t arg_num = 0; | ||||
GetIoAddr(arg_base, arg_num); | GetIoAddr(arg_base, arg_num); | ||||
if (arg_num != all_addresses.size()) { | |||||
GELOGE(INTERNAL_ERROR, "[%s] arg number mismatches, expect = %zu, but got = %zu", | |||||
if (arg_num < all_addresses.size()) { | |||||
GELOGE(INTERNAL_ERROR, "[%s] arg number mismatches, expect at least = %zu, but got = %zu", | |||||
op_desc_->GetName().c_str(), | op_desc_->GetName().c_str(), | ||||
arg_num, | |||||
all_addresses.size()); | |||||
all_addresses.size(), | |||||
arg_num); | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
@@ -392,19 +392,9 @@ const std::set<std::string> ir_builder_suppported_options = {INPUT_FORMAT, | |||||
OP_BANK_PATH}; | OP_BANK_PATH}; | ||||
// for interface: aclgrphParse | // for interface: aclgrphParse | ||||
const std::set<std::string> ir_parser_suppported_options = {INPUT_FORMAT, | |||||
INPUT_SHAPE, | |||||
OP_NAME_MAP, | |||||
IS_DYNAMIC_INPUT, | |||||
INPUT_FP16_NODES, | |||||
IS_INPUT_ADJUST_HW_LAYOUT, | |||||
IS_OUTPUT_ADJUST_HW_LAYOUT, | |||||
OUTPUT, | |||||
OUTPUT_TYPE, | |||||
OUT_NODES, | |||||
COMPRESS_WEIGHT_CONF, | |||||
ENABLE_SCOPE_FUSION_PASSES, | |||||
LOG_LEVEL}; | |||||
const std::set<std::string> ir_parser_suppported_options = { | |||||
INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT, | |||||
OUT_NODES, COMPRESS_WEIGHT_CONF, ENABLE_SCOPE_FUSION_PASSES}; | |||||
// for interface: aclgrphBuildInitialize | // for interface: aclgrphBuildInitialize | ||||
const std::set<std::string> global_options = {CORE_TYPE, | const std::set<std::string> global_options = {CORE_TYPE, | ||||
@@ -1 +1 @@ | |||||
Subproject commit 5a1b0ab95e2d205ee9ee578ac4bcde4f4fbed6d8 | |||||
Subproject commit a71110f5e42dc768ddbbd51289eb467518dedf9b |
@@ -1 +1 @@ | |||||
Subproject commit 77dc42c383e416ed4a0f606ddc3c02cdaa082ac3 | |||||
Subproject commit 2e55b1168df38cd3c76412a8d00bc8b6e7f19f82 |