diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
index c39f25a4..3dca8661 100755
--- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc
@@ -29,8 +29,7 @@ bool IsNoOp(const NodeItem &node_item) {
     const auto &tensor_desc = node_item.MutableOutputDesc(i);
     GE_CHECK_NOTNULL(tensor_desc);
     const auto &shape = tensor_desc->MutableShape();
-    if (shape.IsScalar() || shape.GetShapeSize() > 0 ||
-        (node_item.shape_inference_type == DEPEND_SHAPE_RANGE)) {
+    if (shape.IsScalar() || shape.GetShapeSize() > 0 || (node_item.shape_inference_type == DEPEND_SHAPE_RANGE)) {
       return false;
     }
   }
@@ -220,28 +219,28 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
     RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
   }
 
-  auto callback = [=, &context]() {
-    Status callback_ret = SUCCESS;
-    if (!tasks_.empty()) {
-      // only last task need update outputs shape
-      auto task = tasks_.back().get();
-      if (task->GetUnknownShapeOpType() == DEPEND_SHAPE_RANGE) {
+  auto callback = done_callback;
+  if (!tasks_.empty()) {
+    // only last task need update outputs shape
+    auto task = tasks_.back().get();
+    if (task->GetUnknownShapeOpType() == DEPEND_SHAPE_RANGE) {
+      callback = [=, &context]() {
+        Status callback_ret = SUCCESS;
         GELOGD("Node[%s] need update outputs shape.", context.GetNodeName());
         callback_ret = task->UpdateOutputsShape(context);
-      }
+        if (done_callback != nullptr) {
+          context.SetStatus(callback_ret);
+          done_callback();
+        }
+      };
     }
-    if (done_callback != nullptr) {
-      context.SetStatus(callback_ret);
-      done_callback();
-    }
-  };
-
-  RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(),
-                         "[AiCoreNodeRegisterCallback] Start");
-  GE_CHK_STATUS_RET_NOLOG(context.RegisterCallback(callback));
-  RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(),
-                         "[AiCoreNodeRegisterCallback] End");
+  }
 
+  if (callback != nullptr) {
+    RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeRegisterCallback] Start");
+    GE_CHK_STATUS_RET_NOLOG(context.RegisterCallback(callback));
+    RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeRegisterCallback] End");
+  }
   GELOGD("[%s] ExecuteAsync End.", context.GetNodeName());
   RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeTaskExecuteAsync] End");
   return SUCCESS;
diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
index 83cec5f9..b85ad8fe 100644
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
@@ -37,6 +37,9 @@ constexpr char const *kAttrOpParamSize = "op_para_size";
 constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size";
 const string kAtomicOpType = "DynamicAtomicAddrClean";
 std::atomic<std::uint64_t> log_id(0);
+const uint32_t kMaxDimNum = 8;
+// size,dim1,...,dim8: 9*4=36
+const size_t kShapeBufferSize = sizeof(uint32_t) * (1 + kMaxDimNum);
 }  // namespace
 
 TbeHandleHolder::TbeHandleHolder(void *bin_handle)
@@ -53,36 +56,27 @@ bool TbeHandleRegistry::AddHandle(std::unique_ptr<TbeHandleHolder> &&holder) {
   return ret.second;
 }
 
-Status AiCoreOpTask::Init(const OpDesc &op_desc,
-                          const domi::TaskDef &task_def) {
+Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) {
   GE_CHK_STATUS_RET_NOLOG(DoInit(op_desc, task_def));
   int32_t unknown_shape_op_type_val = static_cast<int32_t>(DEPEND_IN_SHAPE);
-  (void)AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE,
-                          unknown_shape_op_type_val);
-  unknown_shape_op_type_ =
-      static_cast<UnknowShapeOpType>(unknown_shape_op_type_val);
-  GELOGD("Op [%s] unknown shape type is %d", op_desc.GetName().c_str(),
-         unknown_shape_op_type_);
+  (void)AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_op_type_val);
+  unknown_shape_op_type_ = static_cast<UnknowShapeOpType>(unknown_shape_op_type_val);
+  GELOGD("Op [%s] unknown shape type is %d", op_desc.GetName().c_str(), unknown_shape_op_type_);
   if (unknown_shape_op_type_ == DEPEND_SHAPE_RANGE) {
-    // size,dim1,...,dim8: 9*4=36
-    const size_t kDefaultShapeSize = 36;
-    size_t size = kDefaultShapeSize * op_desc.GetOutputsSize();
+    size_t size = kShapeBufferSize * op_desc.GetOutputsSize();
     if (size == 0) {
-      GELOGE(PARAM_INVALID,
-             "Op [%s] unknown shape type is %d, but outputs size is 0.",
-             op_desc.GetName().c_str(), unknown_shape_op_type_);
+      GELOGE(PARAM_INVALID, "Op [%s] unknown shape type is %d, but outputs size is 0.", op_desc.GetName().c_str(),
+             unknown_shape_op_type_);
       return PARAM_INVALID;
     }
     auto allocator = NpuMemoryAllocator::GetAllocator();
     GE_CHECK_NOTNULL(allocator);
     shape_buffer_ = TensorBuffer::Create(allocator, size);
     GE_CHECK_NOTNULL(shape_buffer_);
-    GELOGD("Op [%s] allocate memory for outputs shape success, size=%zu",
-           op_desc.GetName().c_str(), size);
-    vector<char> default_value(size, 0);
-    GE_CHK_RT_RET(rtMemcpy(shape_buffer_->GetData(), shape_buffer_->GetSize(),
-                           default_value.data(), size,
-                           RT_MEMCPY_HOST_TO_DEVICE));
+    GELOGD("Op [%s] allocate memory for outputs shape success, size=%zu", op_desc.GetName().c_str(), size);
+    GE_CHK_RT_RET(rtMemset(shape_buffer_->GetData(), shape_buffer_->GetSize(), 0, size));
+    host_shape_buffer_.reset(new (std::nothrow) uint8_t[shape_buffer_->GetSize()]);
+    GE_CHECK_NOTNULL(host_shape_buffer_);
   }
   return SUCCESS;
 }
@@ -121,84 +115,68 @@ Status AiCoreOpTask::DoInit(const OpDesc &op_desc,
 Status AiCoreOpTask::UpdateOutputsShape(TaskContext &context) const {
   GELOGD("Node[%s] start update outputs shape.", context.GetNodeName());
   GE_CHECK_NOTNULL(shape_buffer_);
-  auto outputs_shape_buffer =
-      std::unique_ptr<uint8_t[]>(new uint8_t[shape_buffer_->GetSize()]);
-  GE_CHK_RT_RET(rtMemcpy(outputs_shape_buffer.get(), shape_buffer_->GetSize(),
-                         shape_buffer_->GetData(), shape_buffer_->GetSize(),
-                         RT_MEMCPY_DEVICE_TO_HOST));
+  GE_CHECK_NOTNULL(host_shape_buffer_);
+  GE_CHK_RT_RET(rtMemcpy(host_shape_buffer_.get(), shape_buffer_->GetSize(), shape_buffer_->GetData(),
+                         shape_buffer_->GetSize(), RT_MEMCPY_DEVICE_TO_HOST));
   int num_outputs = context.NumOutputs();
-  auto outputs_shape =
-      reinterpret_cast<uint32_t(*)[num_outputs]>(outputs_shape_buffer.get());
+  auto outputs_shape = reinterpret_cast<uint32_t(*)[num_outputs]>(host_shape_buffer_.get());
   for (int i = 0; i < num_outputs; ++i) {
     if (outputs_shape[i][0] != 0) {
       uint32_t dim_num = outputs_shape[i][0];
-      const uint32_t kMaxDimNum = 8;
       GE_CHECK_LE(dim_num, kMaxDimNum);
       vector<int64_t> dims;
-      for (uint32_t j = 0; j < dim_num; ++j) {
+      for (uint32_t j = 1; j <= dim_num; ++j) {
         dims.emplace_back(static_cast<int64_t>(outputs_shape[i][j]));
       }
       auto shape_new = GeShape(dims);
-      GELOGD("Node[%s] output[%d] shape:%s.", context.GetNodeName(), i,
-             ToString(dims).c_str());
+      GELOGD("Node[%s] output[%d] shape:%s.", context.GetNodeName(), i, ToString(dims).c_str());
       GE_CHK_STATUS_RET_NOLOG(UpdateShapeToOutputDesc(context, shape_new, i));
     }
   }
   return SUCCESS;
 }
 
-Status AiCoreOpTask::UpdateShapeToOutputDesc(TaskContext &context,
-                                             const GeShape &shape,
-                                             const int output_index) const {
+Status AiCoreOpTask::UpdateShapeToOutputDesc(TaskContext &context, const GeShape &shape, const int output_index) const {
   auto output_desc = context.MutableOutputDesc(output_index);
   GE_CHECK_NOTNULL(output_desc);
   auto shape_old = output_desc->GetShape();
   auto origin_shape_old = output_desc->GetOriginShape();
-  GELOGD(
-      "Node[%s] try to update output[%d] shape from %s to %s, origin_shape "
-      "from %s to %s.",
-      context.GetNodeName(), output_index, shape_old.ToString().c_str(),
-      shape.ToString().c_str(), origin_shape_old.ToString().c_str(),
-      shape.ToString().c_str());
   auto origin_format = output_desc->GetOriginFormat();
   auto format = output_desc->GetFormat();
   auto node_state = context.GetNodeState();
   GE_CHECK_NOTNULL(node_state);
   if (origin_format == format) {
-    GE_CHK_STATUS_RET(
-        node_state->UpdateOutputShapes(output_index, shape, shape),
-        "Node[%s] try to update output[%d] shape from %s to %s, origin_shape "
-        "from %s to %s failed.",
-        context.GetNodeName(), output_index, shape_old.ToString().c_str(),
-        shape.ToString().c_str(), origin_shape_old.ToString().c_str(),
-        shape.ToString().c_str());
+    GELOGD(
+        "Node[%s] try to update output[%d] shape from [%s] to [%s], origin_shape "
+        "from [%s] to [%s].",
+        context.GetNodeName(), output_index, shape_old.ToString().c_str(), shape.ToString().c_str(),
+        origin_shape_old.ToString().c_str(), shape.ToString().c_str());
+    GE_CHK_STATUS_RET(node_state->UpdateOutputShapes(output_index, shape, shape),
+                      "Node[%s] try to update output[%d] shape from [%s] to [%s], origin_shape "
+                      "from [%s] to [%s] failed.",
+                      context.GetNodeName(), output_index, shape_old.ToString().c_str(), shape.ToString().c_str(),
+                      origin_shape_old.ToString().c_str(), shape.ToString().c_str());
     return SUCCESS;
   }
   // if format is not same need convert shape
   std::vector<int64_t> origin_dims_new;
   auto trans_ret =
-      formats::TransShape(format, shape.GetDims(), output_desc->GetDataType(),
-                          origin_format, origin_dims_new);
-  GE_CHK_STATUS_RET(
-      trans_ret,
-      "[Trans][Shape] failed for node[%s] output[%d], origin_format[%d] "
-      "is not same as format[%d], shape=%s.",
-      context.GetNodeName(), output_index, origin_format, format,
-      shape.ToString().c_str());
+      formats::TransShape(format, shape.GetDims(), output_desc->GetDataType(), origin_format, origin_dims_new);
+  GE_CHK_STATUS_RET(trans_ret,
+                    "[Trans][Shape] failed for node[%s] output[%d], origin_format[%d] "
+                    "is not same as format[%d], shape=[%s].",
+                    context.GetNodeName(), output_index, origin_format, format, shape.ToString().c_str());
   auto origin_shape_new = GeShape(origin_dims_new);
-  GE_CHK_STATUS_RET(
-      node_state->UpdateOutputShapes(output_index, shape, origin_shape_new),
-      "Node[%s] try to update output[%d] shape from %s to %s, origin_shape "
-      "from %s to %s failed.",
-      context.GetNodeName(), output_index, shape_old.ToString().c_str(),
-      shape.ToString().c_str(), origin_shape_old.ToString().c_str(),
-      origin_shape_new.ToString().c_str());
+  GE_CHK_STATUS_RET(node_state->UpdateOutputShapes(output_index, shape, origin_shape_new),
+                    "Node[%s] try to update output[%d] shape from [%s] to [%s], origin_shape "
+                    "from [%s] to [%s] failed.",
+                    context.GetNodeName(), output_index, shape_old.ToString().c_str(), shape.ToString().c_str(),
+                    origin_shape_old.ToString().c_str(), origin_shape_new.ToString().c_str());
   GELOGD(
-      "Node[%s] update output[%d] shape from %s to %s, origin_shape "
-      "from %s to %s.",
-      context.GetNodeName(), output_index, shape_old.ToString().c_str(),
-      shape.ToString().c_str(), origin_shape_old.ToString().c_str(),
-      origin_shape_new.ToString().c_str());
+      "Node[%s] update output[%d] shape from [%s] to [%s], origin_shape "
+      "from [%s] to [%s].",
+      context.GetNodeName(), output_index, shape_old.ToString().c_str(), shape.ToString().c_str(),
+      origin_shape_old.ToString().c_str(), origin_shape_new.ToString().c_str());
   return SUCCESS;
 }
 
diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h
index 68d1ed55..a436c9c1 100755
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.h
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h
@@ -133,6 +133,7 @@ class AiCoreOpTask {
   std::string op_type_;
   UnknowShapeOpType unknown_shape_op_type_ = DEPEND_IN_SHAPE;
   std::unique_ptr<TensorBuffer> shape_buffer_ = nullptr;
+  std::unique_ptr<uint8_t[]> host_shape_buffer_ = nullptr;
 };
 
 class AtomicAddrCleanOpTask : public AiCoreOpTask {