Browse Source

No session in tf_singleop_task and add cpu kernels cache.

pull/410/head
unknown 4 years ago
parent
commit
d7158e800d
2 changed files with 31 additions and 21 deletions
  1. +28
    -18
      ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
  2. +3
    -3
      ge/single_op/single_op.cc

+ 28
- 18
ge/graph/load/new_model_manager/task_info/kernel_task_info.cc View File

@@ -74,7 +74,8 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
GELOGD("node[%s] is_n_batch_spilt %d", op_desc_->GetName().c_str(), is_n_batch_spilt_); GELOGD("node[%s] is_n_batch_spilt %d", op_desc_->GetName().c_str(), is_n_batch_spilt_);
(void)AttrUtils::GetInt(*op_desc_, ATTR_NAME_FUSION_GROUP_KEY, group_key_); (void)AttrUtils::GetInt(*op_desc_, ATTR_NAME_FUSION_GROUP_KEY, group_key_);
has_group_key_ = (group_key_ != kInvalidGroupKey); has_group_key_ = (group_key_ != kInvalidGroupKey);
GELOGD("node[%s] has_group_key_ %ld, group key is [%ld]", op_desc_->GetName().c_str(), has_group_key_, group_key_);
GELOGD("node[%s] has_group_key_ %d, group key is [%ld]", op_desc_->GetName().c_str(), has_group_key_, group_key_);

// fusion_op_info // fusion_op_info
vector<std::string> original_op_names; vector<std::string> original_op_names;
bool result = AttrUtils::GetListStr(op_desc_, ge::ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, original_op_names); bool result = AttrUtils::GetListStr(op_desc_, ge::ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, original_op_names);
@@ -217,7 +218,7 @@ Status KernelTaskInfo::SuperKernelLaunch() {
rtError_t rt_ret; rtError_t rt_ret;
auto &skt_kernel_list = skt_info_.kernel_list; auto &skt_kernel_list = skt_info_.kernel_list;
auto &skt_arg_list = skt_info_.arg_list; auto &skt_arg_list = skt_info_.arg_list;
GELOGI("SuperKernelLaunch: Skt_kernel_list size[%d] skt_arg_list[%d]", skt_kernel_list.size(), skt_arg_list.size());
GELOGI("SuperKernelLaunch: Skt_kernel_list size[%zu] skt_arg_list[%zu]", skt_kernel_list.size(), skt_arg_list.size());
if (skt_kernel_list.size() == kSKTSingleSize && skt_arg_list.size() == kSKTSingleSize) { if (skt_kernel_list.size() == kSKTSingleSize && skt_arg_list.size() == kSKTSingleSize) {
rt_ret = rtKernelLaunchWithFlag(skt_info_.kernel_list[0], static_cast<uint32_t>(skt_info_.last_block_dim), rt_ret = rtKernelLaunchWithFlag(skt_info_.kernel_list[0], static_cast<uint32_t>(skt_info_.last_block_dim),
skt_info_.arg_list[0], skt_info_.last_args_size, skt_info_.arg_list[0], skt_info_.last_args_size,
@@ -368,8 +369,9 @@ Status KernelTaskInfo::Distribute() {
GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_); GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_);
} }
rtError_t rt_ret = RT_ERROR_NONE; rtError_t rt_ret = RT_ERROR_NONE;
char *skt_enable_env = getenv("SKT_ENABLE");
int64_t env_flag = (skt_enable_env != nullptr) ? strtol(skt_enable_env, nullptr, 10) : 0;
char skt_enable_env[MMPA_MAX_PATH] = { 0x00 };
INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH);
int64_t env_flag = (res == EN_OK) ? strtol(skt_enable_env, nullptr, 10) : 0;
bool call_skt = ((env_flag != 0) || is_l1_fusion_enable_); bool call_skt = ((env_flag != 0) || is_l1_fusion_enable_);
if (kernel_type_ == cce::ccKernelType::AI_CPU || kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { if (kernel_type_ == cce::ccKernelType::AI_CPU || kernel_type_ == cce::ccKernelType::CUST_AI_CPU) {
GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_); GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_);
@@ -748,15 +750,15 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel
} }
} }
*(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[0])) = *(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[0])) =
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.input_descs)); // arg 0
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.input_descs)); // arg 0
*(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[1])) = *(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[1])) =
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.input_addrs)); // arg 1
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.input_addrs)); // arg 1
*(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[2])) = *(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[2])) =
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.output_descs)); // arg 2
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.output_descs)); // arg 2
*(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[3])) = *(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[3])) =
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.output_addrs)); // arg 3
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.output_addrs)); // arg 3
*(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[4])) = *(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[4])) =
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.attr_handle)); // arg 4
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.attr_handle)); // arg 4


rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
@@ -913,7 +915,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(), GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(),
op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);


aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
aicpu_param_head->extInfoAddr = static_cast<uintptr_t>(aicpu_ext_info_addr_);
aicpu_param_head->extInfoLength = reinterpret_cast<uintptr_t>(ext_info.size()); aicpu_param_head->extInfoLength = reinterpret_cast<uintptr_t>(ext_info.size());


// malloc device memory for args // malloc device memory for args
@@ -1151,18 +1153,24 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u
} }


GELOGI("FileName:%s, Path:%s.", file_name.c_str(), canonicalPath.c_str()); GELOGI("FileName:%s, Path:%s.", file_name.c_str(), canonicalPath.c_str());
auto handle = dlopen(canonicalPath.c_str(), RTLD_NOW | RTLD_GLOBAL);
auto handle = mmDlopen(canonicalPath.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL);
const char *error = "";
if (handle == nullptr) { if (handle == nullptr) {
GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", dlerror());
error = mmDlerror();
GE_IF_BOOL_EXEC(error == nullptr, error = "");
GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", error);
return FAILED; return FAILED;
} }
cce::ccStatus_t cc_ret; cce::ccStatus_t cc_ret;
std::string update_kernel_args = "ccUpdateKernelArgs";
auto cceUpdateKernelArgs = (cce::ccStatus_t(*)(cce::ccOpContext &, uint64_t, uint64_t, uint64_t, void *, uint64_t, auto cceUpdateKernelArgs = (cce::ccStatus_t(*)(cce::ccOpContext &, uint64_t, uint64_t, uint64_t, void *, uint64_t,
void *))dlsym(handle, "ccUpdateKernelArgs");
void *))mmDlsym(handle, const_cast<char *>(update_kernel_args.c_str()));
if (cceUpdateKernelArgs == nullptr) { if (cceUpdateKernelArgs == nullptr) {
GELOGE(FAILED, "Failed to invoke function ccUpdateKernelArgs"); GELOGE(FAILED, "Failed to invoke function ccUpdateKernelArgs");
if (dlclose(handle) != 0) {
GELOGW("Failed to close handle %s", dlerror());
if (mmDlclose(handle) != 0) {
error = mmDlerror();
GE_IF_BOOL_EXEC(error == nullptr, error = "");
GELOGW("Failed to close handle %s", error);
} }
return FAILED; return FAILED;
} else { } else {
@@ -1175,8 +1183,10 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u
const_cast<char *>(kernel_def.args().data()), args_size_, sm_contrl); const_cast<char *>(kernel_def.args().data()), args_size_, sm_contrl);
} }
} }
if (dlclose(handle) != 0) {
GELOGW("Failed to close handle %s", dlerror());
if (mmDlclose(handle) != 0) {
error = mmDlerror();
GE_IF_BOOL_EXEC(error == nullptr, error = "");
GELOGW("Failed to close handle %s", error);
return FAILED; return FAILED;
} }
if (cc_ret != cce::CC_STATUS_SUCCESS) { if (cc_ret != cce::CC_STATUS_SUCCESS) {
@@ -1217,7 +1227,7 @@ Status KernelTaskInfo::SetFlowtable(std::string &flowtable, const domi::KernelDe


*(reinterpret_cast<uint64_t *>( *(reinterpret_cast<uint64_t *>(
args + (reinterpret_cast<uint16_t *>(const_cast<char *>(context.args_offset().data())))[0])) = args + (reinterpret_cast<uint16_t *>(const_cast<char *>(context.args_offset().data())))[0])) =
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(flowtable_));
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(flowtable_));
} }
return SUCCESS; return SUCCESS;
} }


+ 3
- 3
ge/single_op/single_op.cc View File

@@ -57,7 +57,7 @@ Status SingleOp::ValidateArgs(const std::vector<DataBuffer> &inputs, const std::
for (size_t i = 0; i < num_inputs; ++i) { for (size_t i = 0; i < num_inputs; ++i) {
// preventing from read out of bound // preventing from read out of bound
size_t aligned_size = GetAlignedSize(inputs[i].length); size_t aligned_size = GetAlignedSize(inputs[i].length);
GELOGI("Input [%zu], aligned_size:%zu, inputs.length:%lu, input_sizes_:%lu",
GELOGI("Input [%zu], aligned_size:%zu, inputs.length:%lu, input_sizes_:%zu",
i, aligned_size, inputs[i].length, input_sizes_[i]); i, aligned_size, inputs[i].length, input_sizes_[i]);
if (aligned_size < input_sizes_[i]) { if (aligned_size < input_sizes_[i]) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Input size mismatch. index = %zu, model expect %zu," GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Input size mismatch. index = %zu, model expect %zu,"
@@ -75,7 +75,7 @@ Status SingleOp::ValidateArgs(const std::vector<DataBuffer> &inputs, const std::
for (size_t i = 0; i < num_outputs; ++i) { for (size_t i = 0; i < num_outputs; ++i) {
// preventing from write out of bound // preventing from write out of bound
size_t aligned_size = GetAlignedSize(outputs[i].length); size_t aligned_size = GetAlignedSize(outputs[i].length);
GELOGI("Output [%zu], aligned_size:%zu, outputs.length:%lu, output_sizes_:%lu",
GELOGI("Output [%zu], aligned_size:%zu, outputs.length:%lu, output_sizes_:%zu",
i, aligned_size, outputs[i].length, output_sizes_[i]); i, aligned_size, outputs[i].length, output_sizes_[i]);
if (aligned_size < output_sizes_[i]) { if (aligned_size < output_sizes_[i]) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Output size mismatch. index = %zu, model expect %zu," GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Output size mismatch. index = %zu, model expect %zu,"
@@ -141,7 +141,7 @@ Status SingleOp::UpdateArgs(const std::vector<DataBuffer> &inputs, const std::ve
GE_CHECK_NOTNULL(task_io_addr); GE_CHECK_NOTNULL(task_io_addr);
auto io_addr = reinterpret_cast<uint64_t *>(const_cast<uintptr_t *>(task_io_addr)); auto io_addr = reinterpret_cast<uint64_t *>(const_cast<uintptr_t *>(task_io_addr));
for (size_t i = 0; i < io_addr_num; ++i) { for (size_t i = 0; i < io_addr_num; ++i) {
io_addr[i] = reinterpret_cast<uintptr_t>(args_[i]);
io_addr[i] = static_cast<uintptr_t>(args_[i]);
} }
} else { } else {
GELOGW("Only TF_kernel aicpu and aicpu_CC are supported, but got %u", task->GetOpTaskType()); GELOGW("Only TF_kernel aicpu and aicpu_CC are supported, but got %u", task->GetOpTaskType());


Loading…
Cancel
Save