From c28af94028507c4a66854372b9c102210516443c Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Thu, 16 Sep 2021 20:51:19 +0800 Subject: [PATCH] upgarde ascend 0916 --- inc/external/acl/OWNERS | 1 + inc/external/acl/acl_mdl.h | 6 +- inc/external/acl/acl_rt.h | 39 +- inc/external/acl/acl_tdt_queue.h | 426 +++++++++++++++++++++ inc/external/acl/error_codes/rt_error_codes.h | 4 + inc/external/acl/ops/acl_dvpp.h | 2 +- inc/external/ge/ge_api_error_codes.h | 20 +- inc/external/ge/ge_api_types.h | 1 + inc/external/runtime/rt_error_codes.h | 3 + inc/framework/common/debug/ge_log.h | 88 +++-- inc/framework/common/debug/log.h | 140 +++---- inc/framework/common/ge_inner_error_codes.h | 42 +- inc/framework/common/ge_types.h | 17 +- inc/framework/common/op/ge_op_utils.h | 2 +- inc/framework/common/string_util.h | 4 +- inc/framework/common/types.h | 2 + inc/framework/common/util.h | 204 +++++----- inc/framework/engine/dnnengine.h | 23 +- inc/framework/generator/ge_generator.h | 13 +- inc/framework/omg/version.h | 2 +- metadef | 2 +- .../inc/external/runtime/rt_error_codes.h | 219 +++++------ third_party/fwkacllib/inc/ops/array_ops.h | 8 +- third_party/fwkacllib/inc/ops/cluster.h | 58 +++ .../fwkacllib/inc/ops/elewise_calculation_ops.h | 54 +-- third_party/fwkacllib/inc/ops/linalg_ops.h | 8 +- third_party/fwkacllib/inc/ops/math_ops.h | 51 +++ .../fwkacllib/inc/ops/matrix_calculation_ops.h | 50 ++- third_party/fwkacllib/inc/ops/nn_calculation_ops.h | 249 ++++++------ third_party/fwkacllib/inc/ops/nn_detect_ops.h | 4 +- third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h | 42 ++ third_party/fwkacllib/inc/ops/pad_ops.h | 32 ++ third_party/fwkacllib/inc/ops/random_ops.h | 20 +- third_party/fwkacllib/inc/ops/reduce_ops.h | 11 +- third_party/fwkacllib/inc/ops/rnn.h | 56 ++- third_party/fwkacllib/inc/ops/selection_ops.h | 36 +- third_party/fwkacllib/inc/ops/transformation_ops.h | 4 +- third_party/fwkacllib/inc/ops/vector_search.h | 83 +++- third_party/fwkacllib/inc/runtime/base.h | 16 +- third_party/fwkacllib/inc/runtime/config.h | 59 +-- third_party/fwkacllib/inc/runtime/context.h | 6 +- third_party/fwkacllib/inc/runtime/dev.h | 39 +- third_party/fwkacllib/inc/runtime/dvfsprofile.h | 6 +- third_party/fwkacllib/inc/runtime/event.h | 18 +- third_party/fwkacllib/inc/runtime/kernel.h | 55 +-- third_party/fwkacllib/inc/runtime/mem.h | 119 +++--- third_party/fwkacllib/inc/runtime/rt.h | 6 +- third_party/fwkacllib/inc/runtime/rt_ffts.h | 10 +- third_party/fwkacllib/inc/runtime/rt_ffts_plus.h | 10 +- .../fwkacllib/inc/runtime/rt_ffts_plus_define.h | 14 +- third_party/fwkacllib/inc/runtime/rt_mem_queue.h | 416 ++++++++++++++++++++ third_party/fwkacllib/inc/runtime/rt_model.h | 24 +- third_party/fwkacllib/inc/runtime/rt_stars.h | 8 +- .../fwkacllib/inc/runtime/rt_stars_define.h | 6 +- third_party/fwkacllib/inc/runtime/stream.h | 30 +- third_party/fwkacllib/inc/toolchain/prof_acl_api.h | 48 +-- 56 files changed, 2141 insertions(+), 775 deletions(-) create mode 100644 inc/external/acl/acl_tdt_queue.h create mode 100644 third_party/fwkacllib/inc/ops/cluster.h create mode 100644 third_party/fwkacllib/inc/runtime/rt_mem_queue.h diff --git a/inc/external/acl/OWNERS b/inc/external/acl/OWNERS index 8552e853..b4b22068 100755 --- a/inc/external/acl/OWNERS +++ b/inc/external/acl/OWNERS @@ -5,5 +5,6 @@ approvers: reviewers: - justin_zhao - zhangyongfeng88 +- w00267184 options: no_parent_owners: true \ No newline at end of file diff --git a/inc/external/acl/acl_mdl.h b/inc/external/acl/acl_mdl.h index 522dbd38..778fa519 100644 --- a/inc/external/acl/acl_mdl.h +++ b/inc/external/acl/acl_mdl.h @@ -869,7 +869,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet, * * @see aclmdlCreateAIPP */ -ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0, +ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t cscSwitch, int16_t cscMatrixR0C0, int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0, int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0, int16_t cscMatrixR2C1, int16_t cscMatrixR2C2, @@ -1106,7 +1106,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, a * * @param modelId [IN] model id * @param index [IN] index of tensor - * @param aippinfo [OUT] Pointer for static aipp info + * @param aippInfo [OUT] Pointer for static aipp info * * @retval ACL_SUCCESS The function is successfully executed. * @retval ACL_ERROR_MODEL_AIPP_NOT_EXIST The tensor of index is not configured with aipp @@ -1115,7 +1115,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, a * @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | * aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName */ -ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo); +ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippInfo); /** * @ingroup AscendCL diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h index 50dbc34d..7ea27cba 100644 --- a/inc/external/acl/acl_rt.h +++ b/inc/external/acl/acl_rt.h @@ -541,7 +541,7 @@ ACL_FUNC_VISIBILITY aclError aclrtSynchronizeEvent(aclrtEvent event); * * @see aclrtCreateEvent | aclrtRecordEvent | aclrtSynchronizeStream */ -ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, aclrtEvent end); +ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent startEvent, aclrtEvent endEvent); /** * @ingroup AscendCL @@ -733,6 +733,43 @@ ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const v /** * @ingroup AscendCL + * @brief synchronous memory replication of two-dimensional matrix between host and device + * + * @param dst [IN] destination address pointer + * @param dpitch [IN] pitch of destination memory + * @param src [IN] source address pointer + * @param spitch [IN] pitch of source memory + * @param width [IN] width of matrix transfer + * @param height [IN] height of matrix transfer + * @param kind [IN] memcpy type + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtMemcpy2d(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, + size_t height, aclrtMemcpyKind kind); + +/** + * @ingroup AscendCL + * @brief asynchronous memory replication of two-dimensional matrix between host and device + * + * @param dst [IN] destination address pointer + * @param dpitch [IN] pitch of destination memory + * @param src [IN] source address pointer + * @param spitch [IN] pitch of source memory + * @param width [IN] width of matrix transfer + * @param height [IN] height of matrix transfer + * @param kind [IN] memcpy type + * @param stream [IN] asynchronized task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtMemcpy2dAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, + size_t height, aclrtMemcpyKind kind, aclrtStream stream); + +/** + * @ingroup AscendCL * @brief Asynchronous initialize memory * and set contents of memory to specified value async * diff --git a/inc/external/acl/acl_tdt_queue.h b/inc/external/acl/acl_tdt_queue.h new file mode 100644 index 00000000..d47213d2 --- /dev/null +++ b/inc/external/acl/acl_tdt_queue.h @@ -0,0 +1,426 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_EXTERNAL_ACL_ACL_TDT_QUEUE_H_ +#define INC_EXTERNAL_ACL_ACL_TDT_QUEUE_H_ + +#include "acl/acl_base.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ACL_TDT_QUEUE_PERMISSION_MANAGE 1 +#define ACL_TDT_QUEUE_PERMISSION_DEQUEUE 2 +#define ACL_TDT_QUEUE_PERMISSION_ENQUEUE 4 + +typedef void *acltdtBuf; +typedef struct tagMemQueueAttr acltdtQueueAttr; +typedef struct acltdtQueueRouteList acltdtQueueRouteList; +typedef struct acltdtQueueRouteQueryInfo acltdtQueueRouteQueryInfo; +typedef struct acltdtQueueRoute acltdtQueueRoute; + +typedef enum { ACL_TDT_QUEUE_NAME_PTR = 0, ACL_TDT_QUEUE_DEPTH_UINT32 } acltdtQueueAttrType; + +typedef enum { + ACL_TDT_QUEUE_ROUTE_SRC_UINT32 = 0, + ACL_TDT_QUEUE_ROUTE_DST_UINT32, + ACL_TDT_QUEUE_ROUTE_STATUS_INT32 +} acltdtQueueRouteParamType; + +typedef enum { + ACL_TDT_QUEUE_ROUTE_QUERY_SRC = 0, + ACL_TDT_QUEUE_ROUTE_QUERY_DST, + ACL_TDT_QUEUE_ROUTE_QUERY_SRC_AND_DST +} acltdtQueueRouteQueryMode; + +typedef enum { + ACL_TDT_QUEUE_ROUTE_QUERY_MODE_ENUM = 0, + ACL_TDT_QUEUE_ROUTE_QUERY_SRC_ID_UINT32, + ACL_TDT_QUEUE_ROUTE_QUERY_DST_ID_UINT32 +} acltdtQueueRouteQueryInfoParamType; + +/** + * @ingroup AscendCL + * @brief create queue + * + * @param attr [IN] pointer to the queue attr + * @param qid [OUT] pointer to the qid + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtDestroyQueue + */ +ACL_FUNC_VISIBILITY aclError acltdtCreateQueue(const acltdtQueueAttr *attr, uint32_t *qid); + +/** + * @ingroup AscendCL + * @brief destroy queue + * + * @param qid [IN] qid which to be destroyed + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateQueue + */ +ACL_FUNC_VISIBILITY aclError acltdtDestroyQueue(uint32_t qid); + +/** + * @ingroup AscendCL + * @brief enqueue function + * + * @param qid [IN] qid + * @param buf [IN] acltdtBuf + * @param timeout [IN] timeout + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtDequeue + */ +ACL_FUNC_VISIBILITY aclError acltdtEnqueue(uint32_t qid, acltdtBuf buf, int32_t timeout); + +/** + * @ingroup AscendCL + * @brief dequeue function + * + * @param qid [IN] qid + * @param buf [OUT] pointer to the acltdtBuf + * @param timeout [IN] timeout + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtEnqueue + */ +ACL_FUNC_VISIBILITY aclError acltdtDequeue(uint32_t qid, acltdtBuf *buf, int32_t timeout); + +/** + * @ingroup AscendCL + * @brief grant queue to other process + * + * @param qid [IN] qid + * @param pid [IN] pid of dst process + * @param permission [IN] permission of queue + * @param timeout [IN] timeout + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see ACL_TDT_QUEUE_PERMISSION_MANAGE | ACL_TDT_QUEUE_PERMISSION_DEQUEUE | ACL_TDT_QUEUE_PERMISSION_ENQUEUE + */ +ACL_FUNC_VISIBILITY aclError acltdtGrantQueue(uint32_t qid, int32_t pid, uint32_t permission, int32_t timeout); + +/** + * @ingroup AscendCL + * @brief attach queue in current process + * + * @param qid [IN] qid + * @param timeout [IN] timeout + * @param permission [OUT] permission of queue + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtGrantQueue + */ +ACL_FUNC_VISIBILITY aclError acltdtAttachQueue(uint32_t qid, int32_t timeout, uint32_t *permission); + +/** + * @ingroup AscendCL + * @brief bind queue routes + * + * @param qRouteList [IN|OUT] pointer to the route list + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acltdtBindQueueRoutes(acltdtQueueRouteList *qRouteList); + +/** + * @ingroup AscendCL + * @brief unbind queue routes + * + * @param qRouteList [IN|OUT] pointer to the route list + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acltdtUnbindQueueRoutes(acltdtQueueRouteList *qRouteList); + +/** + * @ingroup AscendCL + * @brief query queue routes according to query mode + * + * @param queryInfo [IN] pointer to the queue route query info + * @param qRouteList [IN|OUT] pointer to the route list + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acltdtQueryQueueRoutes(const acltdtQueueRouteQueryInfo *queryInfo, + acltdtQueueRouteList *qRouteList); + +/** + * @ingroup AscendCL + * @brief alloc acltdtBuf + * + * @param size [IN] alloc buf size + * @param buf [OUT] pointer to the acltdtBuf + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtFreeBuf + */ +ACL_FUNC_VISIBILITY aclError acltdtAllocBuf(size_t size, acltdtBuf *buf); + +/** + * @ingroup AscendCL + * @brief free acltdtBuf + * + * @param buf [IN] pointer to the acltdtBuf + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtAllocBuf + */ +ACL_FUNC_VISIBILITY aclError acltdtFreeBuf(acltdtBuf buf); + +/** + * @ingroup AscendCL + * @brief get data buf address + * + * @param buf [IN] acltdtBuf + * @param dataPtr [OUT] pointer to the data ptr which is acquired from acltdtBuf + * @param size [OUT] pointer to the size + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtAllocBuf + */ +ACL_FUNC_VISIBILITY aclError acltdtGetBufData(const acltdtBuf buf, void **dataPtr, size_t *size); + +/** + * @ingroup AscendCL + * @brief Create the queue attr + * + * @retval null for failed + * @retval OtherValues success + * + * @see acltdtDestroyQueueAttr + */ +ACL_FUNC_VISIBILITY acltdtQueueAttr *acltdtCreateQueueAttr(); + +/** + * @ingroup AscendCL + * @brief Destroy the queue attr + * + * @param attr [IN] pointer to the queue attr + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateQueueAttr + */ +ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueAttr(const acltdtQueueAttr *attr); + +/** + * @ingroup AscendCL + * @brief Set parameter for queue attr + * + * @param attr [IN|OUT] pointer to the queue attr + * @param type [IN] parameter type + * @param len [IN] parameter length + * @param param [IN] pointer to parameter value + * + * @retval ACL_SUCCESS for success, other for failure + * + * @see acltdtCreateQueueAttr + */ +ACL_FUNC_VISIBILITY aclError acltdtSetQueueAttr(acltdtQueueAttr *attr, acltdtQueueAttrType type, size_t len, + const void *param); + +/** + * @ingroup AscendCL + * + * @brief Get parameter for queue attr. + * + * @param attr [IN] pointer to the queue attr + * @param type [IN] parameter type + * @param len [IN] parameter length + * @param paramRetSize [OUT] pointer to parameter real length + * @param param [OUT] pointer to parameter value + * + * @retval ACL_SUCCESS for success, other for failure + * + * @see acltdtCreateQueueAttr + */ +ACL_FUNC_VISIBILITY aclError acltdtGetQueueAttr(const acltdtQueueAttr *attr, acltdtQueueAttrType type, size_t len, + size_t *paramRetSize, void *param); + +/** + * @ingroup AscendCL + * @brief Create the queue route + * + * @param srcId [IN] src id of queue route + * @param dstId [IN] dst id of queue route + * + * @retval null for failed + * @retval OtherValues success + * + * @see acltdtDestroyQueueRoute + */ +ACL_FUNC_VISIBILITY acltdtQueueRoute *acltdtCreateQueueRoute(uint32_t srcId, uint32_t dstId); + +/** + * @ingroup AscendCL + * @brief Destroy the queue attr + * + * @param route [IN] pointer to the queue route + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateQueueRoute + */ +ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueRoute(const acltdtQueueRoute *route); + +/** + * @ingroup AscendCL + * + * @brief Get parameter for queue route. + * + * @param route [IN] pointer to the queue route + * @param type [IN] parameter type + * @param len [IN] parameter length + * @param paramRetSize [OUT] pointer to parameter real length + * @param param [OUT] pointer to parameter value + * + * @retval ACL_SUCCESS for success, other for failure + * + * @see acltdtCreateQueueRoute + */ +ACL_FUNC_VISIBILITY aclError acltdtGetQueueRouteParam(const acltdtQueueRoute *route, acltdtQueueRouteParamType type, + size_t len, size_t *paramRetSize, void *param); + +/** + * @ingroup AscendCL + * @brief Create the queue route list + * + * @retval null for failed + * @retval OtherValues success + * + * @see acltdtDestroyQueueRouteList + */ +ACL_FUNC_VISIBILITY acltdtQueueRouteList *acltdtCreateQueueRouteList(); + +/** + * @ingroup AscendCL + * @brief Destroy the queue route list + * + * @param routeList [IN] pointer to the queue route list + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateQueueRouteList + */ +ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueRouteList(const acltdtQueueRouteList *routeList); + +/** + * @ingroup AscendCL + * @brief add queue route to the route list + * + * @param routeList [IN|OUT] pointer to the queue route list + * @param route [IN] pointer to the queue route + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateQueueRouteList | acltdtCreateQueueRoute + * + */ +ACL_FUNC_VISIBILITY aclError acltdtAddQueueRoute(acltdtQueueRouteList *routeList, const acltdtQueueRoute *route); + +/** + * @ingroup AscendCL + * @brief get queue route from route list + * + * @param routeList [IN] pointer to the queue route list + * @param index [IN] index of queue route in route list + * @param route [IN|OUT] pointer to the queue route + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateQueueRouteList | acltdtCreateQueueRoute + * + */ +ACL_FUNC_VISIBILITY aclError acltdtGetQueueRoute(const acltdtQueueRouteList *routeList, size_t index, + acltdtQueueRoute *route); + +/** + * @ingroup AscendCL + * @brief Create the queue route query info + * + * @retval null for failed + * @retval OtherValues success + * + * @see acltdtDestroyQueueRouteQueryInfo + */ +ACL_FUNC_VISIBILITY acltdtQueueRouteQueryInfo *acltdtCreateQueueRouteQueryInfo(); + +/** + * @ingroup AscendCL + * @brief Destroy the queue route query info + * + * @param info [IN] pointer to the queue route info + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acltdtCreateQueueRouteQueryInfo + * + */ +ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueRouteQueryInfo(const acltdtQueueRouteQueryInfo *info); + +/** + * @ingroup AscendCL + * @brief Set parameter for queue route info + * + * @param attr [IN|OUT] pointer to the queue route info + * @param type [IN] parameter type + * @param len [IN] parameter length + * @param param [IN] pointer to parameter value + * + * @retval ACL_SUCCESS for success, other for failure + * + * @see acltdtCreateQueueRouteQueryInfo + */ +ACL_FUNC_VISIBILITY aclError acltdtSetQueueRouteQueryInfo(acltdtQueueRouteQueryInfo *param, + acltdtQueueRouteQueryInfoParamType type, size_t len, + const void *value); + +#ifdef __cplusplus +} +#endif + +#endif // INC_EXTERNAL_ACL_ACL_TDT_QUEUE_H_ \ No newline at end of file diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h index 1c196c48..556652be 100644 --- a/inc/external/acl/error_codes/rt_error_codes.h +++ b/inc/external/acl/error_codes/rt_error_codes.h @@ -56,6 +56,10 @@ static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event res static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource +static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource +static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit +static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty +static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h index 5418ebd3..a536a23b 100644 --- a/inc/external/acl/ops/acl_dvpp.h +++ b/inc/external/acl/ops/acl_dvpp.h @@ -125,7 +125,7 @@ enum acldvppPixelFormat { enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL }; // Supported Channel Mode -enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 }; +enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4, DVPP_CHNMODE_PNGD = 8 }; // Supported Border Type enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 }; diff --git a/inc/external/ge/ge_api_error_codes.h b/inc/external/ge/ge_api_error_codes.h index d0d7981e..2512de0a 100644 --- a/inc/external/ge/ge_api_error_codes.h +++ b/inc/external/ge/ge_api_error_codes.h @@ -72,17 +72,23 @@ class GE_FUNC_VISIBILITY StatusFactory { class GE_FUNC_VISIBILITY ErrorNoRegisterar { public: - ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } - ErrorNoRegisterar(uint32_t err, const char *desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } + ErrorNoRegisterar(uint32_t err, const std::string &desc) { + StatusFactory::Instance()->RegisterErrorNo(err, desc); + } + ErrorNoRegisterar(uint32_t err, const char *desc) { + StatusFactory::Instance()->RegisterErrorNo(err, desc); + } ~ErrorNoRegisterar() {} }; // Code compose(4 byte), runtime: 2 bit, type: 2 bit, level: 3 bit, sysid: 8 bit, modid: 5 bit, value: 12 bit -#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc) \ - constexpr ge::Status name = \ - ((0xFF & (static_cast(runtime))) << 30) | ((0xFF & (static_cast(type))) << 28) | \ - ((0xFF & (static_cast(level))) << 25) | ((0xFF & (static_cast(sysid))) << 17) | \ - ((0xFF & (static_cast(modid))) << 12) | (0x0FFF & (static_cast(value))); \ +#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc) \ + constexpr ge::Status name = (static_cast(0xFFU & (static_cast(runtime))) << 30) | \ + (static_cast(0xFFU & (static_cast(type))) << 28) | \ + (static_cast(0xFFU & (static_cast(level))) << 25) | \ + (static_cast(0xFFU & (static_cast(sysid))) << 17) | \ + (static_cast(0xFFU & (static_cast(modid))) << 12) | \ + (static_cast(0x0FFFU) & (static_cast(value))); \ const ErrorNoRegisterar g_##name##_errorno(name, desc); #define GE_ERRORNO_EXTERNAL(name, desc) const ErrorNoRegisterar g_##name##_errorno(name, desc); diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index ac821281..9ee63797 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -67,6 +67,7 @@ const char *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOp const char *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput"; const char *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode"; const char *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange"; +const char *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr"; // Option key: memory init const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h index ef7e2ec7..1a8dc3e9 100644 --- a/inc/external/runtime/rt_error_codes.h +++ b/inc/external/runtime/rt_error_codes.h @@ -58,6 +58,9 @@ static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream re static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource +static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit +static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty +static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error diff --git a/inc/framework/common/debug/ge_log.h b/inc/framework/common/debug/ge_log.h index 3e646440..5ee2daee 100644 --- a/inc/framework/common/debug/ge_log.h +++ b/inc/framework/common/debug/ge_log.h @@ -33,7 +33,7 @@ extern "C" { #endif -#define GE_MODULE_NAME static_cast(GE) +#define GE_MODULE_NAME static_cast(GE) // trace status of log enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP }; @@ -51,43 +51,61 @@ class GE_FUNC_VISIBILITY GeLog { }; inline bool IsLogEnable(int module_name, int log_level) { - int32_t enable = CheckLogLevel(module_name, log_level); + const int32_t enable = CheckLogLevel(module_name, log_level); // 1:enable, 0:disable return (enable == 1); } -#define GELOGE(ERROR_CODE, fmt, ...) \ - dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %d(%s) %s" fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ - ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ - ##__VA_ARGS__) -#define GELOGW(fmt, ...) \ - if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) \ - dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) -#define GELOGI(fmt, ...) \ - if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) \ - dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) -#define GELOGD(fmt, ...) \ - if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) \ - dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) - -#define GEEVENT(fmt, ...) dlog_event(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) - -#define GELOGT(VALUE, fmt, ...) \ - do { \ - TraceStatus stat = VALUE; \ - const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ - int idx = static_cast(stat); \ - char *k = const_cast("status"); \ - char *v = const_cast(TraceStatStr[idx]); \ - KeyValue kv = {k, v}; \ - DlogWithKV(static_cast(GE_MODULE_NAME), DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, \ - ##__VA_ARGS__); \ - } while (0) - -#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ - dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) %s" fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ - ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ - ##__VA_ARGS__) +#define GELOGE(ERROR_CODE, fmt, ...) \ + do { \ + dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], ERROR_CODE, \ + ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ + ##__VA_ARGS__); \ + } while (false) + +#define GELOGW(fmt, ...) \ + do { \ + if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) { \ + dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ + } \ + } while (false) + +#define GELOGI(fmt, ...) \ + do { \ + if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) { \ + dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ + } \ + } while (false) + +#define GELOGD(fmt, ...) \ + do { \ + if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) { \ + dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ + } \ + } while (false) + +#define GEEVENT(fmt, ...) \ + do { \ + dlog_event(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ + } while (false) + +#define GELOGT(VALUE, fmt, ...) \ + do { \ + TraceStatus stat = VALUE; \ + const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ + const int32_t idx = static_cast(stat); \ + char *k = const_cast("status"); \ + char *v = const_cast(TraceStatStr[idx]); \ + KeyValue kv = {k, v}; \ + DlogWithKV(GE_MODULE_NAME, DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ + } while (false) + +#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ + do { \ + dlog_error(MOD_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], ERROR_CODE, \ + ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ + ##__VA_ARGS__); \ + } while (false) // print memory when it is greater than 1KB. #define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE) \ @@ -95,7 +113,7 @@ inline bool IsLogEnable(int module_name, int log_level) { if ((SIZE) > 1024) { \ GELOGI("MallocMemory, func=%s, size=%zu, purpose=%s", (#FUNC), static_cast(SIZE), (PURPOSE)); \ } \ - } while (0); + } while (false) #ifdef __cplusplus } #endif diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h index f06faa1b..2fe425c9 100644 --- a/inc/framework/common/debug/log.h +++ b/inc/framework/common/debug/log.h @@ -52,82 +52,82 @@ GELOGW(__VA_ARGS__); \ } -#define GE_LOGE_IF(condition, ...) \ - if ((condition)) { \ - DOMI_LOGE(__VA_ARGS__); \ +#define GE_LOGE_IF(condition, ...) \ + if ((condition)) { \ + GELOGE(ge::FAILED, __VA_ARGS__); \ } // If expr is not SUCCESS, print the log and return the same value -#define GE_CHK_STATUS_RET(expr, ...) \ - do { \ - const ge::Status _status = (expr); \ - if (_status != ge::SUCCESS) { \ - DOMI_LOGE(__VA_ARGS__); \ - return _status; \ - } \ - } while (0); +#define GE_CHK_STATUS_RET(expr, ...) \ + do { \ + const ge::Status _chk_status = (expr); \ + if (_chk_status != ge::SUCCESS) { \ + GELOGE(ge::FAILED, __VA_ARGS__); \ + return _chk_status; \ + } \ + } while (false) // If expr is not SUCCESS, print the log and do not execute return -#define GE_CHK_STATUS(expr, ...) \ - do { \ - const ge::Status _status = (expr); \ - if (_status != ge::SUCCESS) { \ - DOMI_LOGE(__VA_ARGS__); \ - } \ - } while (0); +#define GE_CHK_STATUS(expr, ...) \ + do { \ + const ge::Status _chk_status = (expr); \ + if (_chk_status != ge::SUCCESS) { \ + GELOGE(ge::FAILED, __VA_ARGS__); \ + } \ + } while (false) // If expr is not SUCCESS, return the same value -#define GE_CHK_STATUS_RET_NOLOG(expr) \ - do { \ - const ge::Status _status = (expr); \ - if (_status != ge::SUCCESS) { \ - return _status; \ - } \ - } while (0); +#define GE_CHK_STATUS_RET_NOLOG(expr) \ + do { \ + const ge::Status _chk_status = (expr); \ + if (_chk_status != ge::SUCCESS) { \ + return _chk_status; \ + } \ + } while (false) // If expr is not GRAPH_SUCCESS, print the log and return FAILED #define GE_CHK_GRAPH_STATUS_RET(expr, ...) \ do { \ if ((expr) != ge::GRAPH_SUCCESS) { \ REPORT_CALL_ERROR("E19999", "Operator graph failed"); \ - DOMI_LOGE(__VA_ARGS__); \ + GELOGE(ge::FAILED, __VA_ARGS__); \ return FAILED; \ } \ - } while (0); + } while (false) // If expr is not SUCCESS, print the log and execute a custom statement -#define GE_CHK_STATUS_EXEC(expr, exec_expr, ...) \ - do { \ - const ge::Status _status = (expr); \ - GE_CHK_BOOL_EXEC(_status == SUCCESS, exec_expr, __VA_ARGS__); \ - } while (0); +#define GE_CHK_STATUS_EXEC(expr, exec_expr, ...) \ + do { \ + const ge::Status _chk_status = (expr); \ + GE_CHK_BOOL_EXEC(_chk_status == SUCCESS, exec_expr, __VA_ARGS__); \ + } while (false) // If expr is not true, print the log and return the specified status #define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \ do { \ - bool b = (expr); \ + const bool b = (expr); \ if (!b) { \ REPORT_INNER_ERROR("E19999", __VA_ARGS__); \ GELOGE(_status, __VA_ARGS__); \ return _status; \ } \ - } while (0); + } while (false) // If expr is not true, print the log and return the specified status #define GE_CHK_BOOL_RET_STATUS_NOLOG(expr, _status, ...) \ do { \ - bool b = (expr); \ + const bool b = (expr); \ if (!b) { \ return _status; \ } \ - } while (0); + } while (false) // If expr is not true, print the log and execute a custom statement #define GE_CHK_BOOL_EXEC(expr, exec_expr, ...) \ { \ - bool b = (expr); \ + const bool b = (expr); \ if (!b) { \ - DOMI_LOGE(__VA_ARGS__); \ + GELOGE(ge::FAILED, __VA_ARGS__); \ exec_expr; \ } \ } @@ -135,7 +135,7 @@ // If expr is not true, print the log and execute a custom statement #define GE_CHK_BOOL_EXEC_WARN(expr, exec_expr, ...) \ { \ - bool b = (expr); \ + const bool b = (expr); \ if (!b) { \ GELOGW(__VA_ARGS__); \ exec_expr; \ @@ -144,7 +144,7 @@ // If expr is not true, print the log and execute a custom statement #define GE_CHK_BOOL_EXEC_INFO(expr, exec_expr, ...) \ { \ - bool b = (expr); \ + const bool b = (expr); \ if (!b) { \ GELOGI(__VA_ARGS__); \ exec_expr; \ @@ -154,7 +154,7 @@ // If expr is not true, print the log and execute a custom statement #define GE_CHK_BOOL_TRUE_EXEC_INFO(expr, exec_expr, ...) \ { \ - bool b = (expr); \ + const bool b = (expr); \ if (b) { \ GELOGI(__VA_ARGS__); \ exec_expr; \ @@ -164,16 +164,16 @@ // If expr is true, print logs and execute custom statements #define GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(expr, exec_expr, ...) \ { \ - bool b = (expr); \ + const bool b = (expr); \ if (b) { \ - DOMI_LOGE(__VA_ARGS__); \ + GELOGE(ge::FAILED, __VA_ARGS__); \ exec_expr; \ } \ } // If expr is true, print the Information log and execute a custom statement #define GE_CHK_TRUE_EXEC_INFO(expr, exec_expr, ...) \ { \ - bool b = (expr); \ + const bool b = (expr); \ if (b) { \ GELOGI(__VA_ARGS__); \ exec_expr; \ @@ -183,9 +183,9 @@ // If expr is not SUCCESS, print the log and execute the expression + return #define GE_CHK_BOOL_TRUE_RET_VOID(expr, exec_expr, ...) \ { \ - bool b = (expr); \ + const bool b = (expr); \ if (b) { \ - DOMI_LOGE(__VA_ARGS__); \ + GELOGE(ge::FAILED, __VA_ARGS__); \ exec_expr; \ return; \ } \ @@ -194,10 +194,10 @@ // If expr is not SUCCESS, print the log and execute the expression + return _status #define GE_CHK_BOOL_TRUE_EXEC_RET_STATUS(expr, _status, exec_expr, ...) \ { \ - bool b = (expr); \ + const bool b = (expr); \ if (b) { \ REPORT_INNER_ERROR("E19999", __VA_ARGS__); \ - DOMI_LOGE(__VA_ARGS__); \ + GELOGE(ge::FAILED, __VA_ARGS__); \ exec_expr; \ return _status; \ } \ @@ -206,7 +206,7 @@ // If expr is not true, execute a custom statement #define GE_CHK_BOOL_EXEC_NOLOG(expr, exec_expr) \ { \ - bool b = (expr); \ + const bool b = (expr); \ if (!b) { \ exec_expr; \ } \ @@ -214,34 +214,34 @@ // -----------------runtime related macro definitions------------------------------- // If expr is not RT_ERROR_NONE, print the log -#define GE_CHK_RT(expr) \ - do { \ - rtError_t _rt_ret = (expr); \ - if (_rt_ret != RT_ERROR_NONE) { \ - DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ - } \ - } while (0); +#define GE_CHK_RT(expr) \ + do { \ + const rtError_t _rt_ret = (expr); \ + if (_rt_ret != RT_ERROR_NONE) { \ + GELOGE(ge::FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \ + } \ + } while (false) // If expr is not RT_ERROR_NONE, print the log and execute the exec_expr expression -#define GE_CHK_RT_EXEC(expr, exec_expr) \ - { \ - rtError_t _rt_ret = (expr); \ - if (_rt_ret != RT_ERROR_NONE) { \ - DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ - exec_expr; \ - } \ - } +#define GE_CHK_RT_EXEC(expr, exec_expr) \ + do { \ + const rtError_t _rt_ret = (expr); \ + if (_rt_ret != RT_ERROR_NONE) { \ + GELOGE(ge::FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \ + exec_expr; \ + } \ + } while (false) // If expr is not RT_ERROR_NONE, print the log and return #define GE_CHK_RT_RET(expr) \ do { \ - rtError_t _rt_ret = (expr); \ + const rtError_t _rt_ret = (expr); \ if (_rt_ret != RT_ERROR_NONE) { \ REPORT_CALL_ERROR("E19999", "Call %s fail, ret: 0x%X", #expr, _rt_ret); \ - DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ + GELOGE(ge::FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \ return RT_ERROR_TO_GE_STATUS(_rt_ret); \ } \ - } while (0); + } while (false) // If expr is true, execute exec_expr without printing logs #define GE_IF_BOOL_EXEC(expr, exec_expr) \ @@ -256,7 +256,7 @@ try { \ exec_expr0; \ } catch (const std::bad_alloc &) { \ - DOMI_LOGE("Make shared failed"); \ + GELOGE(ge::FAILED, "Make shared failed"); \ exec_expr1; \ } @@ -274,13 +274,13 @@ #define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg) \ do { \ - bool b = (expr); \ + const bool b = (expr); \ if (!b) { \ GELOGE(_status, "%s", errormsg); \ ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {errormsg}); \ return _status; \ } \ - } while (0) + } while (false) template GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) { diff --git a/inc/framework/common/ge_inner_error_codes.h b/inc/framework/common/ge_inner_error_codes.h index 3697a526..0ab9721e 100644 --- a/inc/framework/common/ge_inner_error_codes.h +++ b/inc/framework/common/ge_inner_error_codes.h @@ -61,29 +61,29 @@ enum ErrorLevel { CRITICAL_LEVEL = 0b100, }; -// Each module defines error codes using the following macros +// Each module defines error codes using the following macros, name can not be modified to (name) #define GE_ERRORNO_COMMON(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, COMMON_MODULE, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, COMMON_MODULE, name, (value), (desc)) #define GE_ERRORNO_CLIENT(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, CLIENT_MODULE, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, CLIENT_MODULE, name, (value), (desc)) #define GE_ERRORNO_INIT(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, INIT_MODULE, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, INIT_MODULE, name, (value), (desc)) #define GE_ERRORNO_SESSION(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, SESSION_MODULE, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, SESSION_MODULE, name, (value), (desc)) #define GE_ERRORNO_GRAPH(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GRAPH_MODULE, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GRAPH_MODULE, name, (value), (desc)) #define GE_ERRORNO_ENGINE(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, ENGINE_MODULE, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, ENGINE_MODULE, name, (value), (desc)) #define GE_ERRORNO_OPS(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, OPS_MODULE, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, OPS_MODULE, name, (value), (desc)) #define GE_ERRORNO_PLUGIN(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, PLUGIN_MODULE, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, PLUGIN_MODULE, name, (value), (desc)) #define GE_ERRORNO_RUNTIME(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, RUNTIME_MODULE, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, RUNTIME_MODULE, name, (value), (desc)) #define GE_ERRORNO_EXECUTOR(name, value, desc) \ - GE_ERRORNO(RT_DEVICE, ERROR_CODE, COMMON_LEVEL, SYSID_GE, EXECUTOR_MODULE, name, value, desc) + GE_ERRORNO(RT_DEVICE, ERROR_CODE, COMMON_LEVEL, SYSID_GE, EXECUTOR_MODULE, name, (value), (desc)) #define GE_ERRORNO_GENERATOR(name, value, desc) \ - GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GENERATOR_MODULE, name, value, desc) + GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GENERATOR_MODULE, name, (value), (desc)) // Get error code description #define GE_GET_ERRORNO_STR(value) ge::StatusFactory::Instance()->GetErrDesc(value) @@ -125,13 +125,13 @@ GE_ERRORNO_CLIENT(GE_CLI_GE_ALREADY_INITIALIZED, 10, "GE is already initialized. GE_ERRORNO_CLIENT(GE_CLI_GE_NOT_INITIALIZED, 11, "GE is not yet initialized or is finalized."); // 1343229963 // Init module error code definition -GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported."); // 1343234048 -GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization."); // 1343234049 -GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported."); // 1343234050 -GE_ERRORNO_INIT(GE_PROF_MULTI_INIT, 3, "Multiple profiling initializations are not supported."); // 1343234051 -GE_ERRORNO_INIT(GE_PROF_NOT_INIT, 4, "Profing initializations have not been done."); // 1343234052 +GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported."); // 1343234048 +GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization."); // 1343234049 +GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported."); // 1343234050 +GE_ERRORNO_INIT(GE_PROF_MULTI_INIT, 3, "Multiple profiling initializations are not supported."); // 1343234051 +GE_ERRORNO_INIT(GE_PROF_NOT_INIT, 4, "Profing initializations have not been done."); // 1343234052 GE_ERRORNO_INIT(GE_PROF_MODE_CONFLICT, 5, - "Profiling command mode which is preferred is running, the api mode will not work."); // 1343234053 + "Profiling command mode which is preferred is running, the api mode will not work."); // 1343234053 // Session module error code definition GE_ERRORNO_SESSION(GE_SESS_INIT_FAILED, 0, "Failed to initialize session."); // 1343238144 @@ -216,8 +216,8 @@ GE_ERRORNO_ENGINE(GE_ENG_FINALIZE_FAILED, 1, "Engine finalize failed."); GE_ERRORNO_ENGINE(GE_ENG_MEMTYPE_ERROR, 2, "Memory type HBM is necessary when engine is in device"); // 1343246338 // Optimize errocode -GE_ERRORNO_GRAPH(TO_BE_DELETED, 63, "The node of the graph to be deleted."); // 1343242303 -GE_ERRORNO_GRAPH(NOT_CHANGED, 64, "The node of the graph no changed."); // 1343242304 +GE_ERRORNO_GRAPH(TO_BE_DELETED, 63, "The node of the graph to be deleted."); // 1343242303 +GE_ERRORNO_GRAPH(NOT_CHANGED, 64, "The node of the graph no changed."); // 1343242304 // Ops module error code definition GE_ERRORNO_OPS(GE_OPS_KERNEL_STORE_INIT_FAILED, 0, "Failed to initialize OpsKernelInfoStore."); // 1343250432 @@ -313,7 +313,7 @@ GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, 3, "Graph ma GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, 4, "Graph manager finalize failed."); GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_SAVE_MODEL_FAILED, 5, "Graph manager save model failed."); -#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast(RT_ERROR) +#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast(RT_ERROR) } // namespace ge #endif // INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_ diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index 83d01c1f..066327b0 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -279,10 +279,19 @@ struct TaskDescInfo { }; struct OpDescInfo { - std::string op_name; - std::string op_type; - uint32_t task_id; - uint32_t stream_id; + std::string op_name = ""; + std::string op_type = ""; + uint32_t task_id = 0; + uint32_t stream_id = 0; + uint32_t imply_type = 0; + uint32_t block_dim = 0; + std::string op_file_path = ""; + std::string dev_func = ""; + std::string tvm_magic = ""; + uint32_t tiling_key = 0; + std::string tiling_data = ""; + std::string node_info = ""; + std::vector workspace_bytes; std::vector input_format; std::vector> input_shape; std::vector input_data_type; diff --git a/inc/framework/common/op/ge_op_utils.h b/inc/framework/common/op/ge_op_utils.h index bc965d13..be677407 100644 --- a/inc/framework/common/op/ge_op_utils.h +++ b/inc/framework/common/op/ge_op_utils.h @@ -95,7 +95,7 @@ class GE_FUNC_VISIBILITY OpUtils { /// @param [out] aipp_params aipp parameters /// @return enum of tagCCAippInputFormat /// - static Status ConvertAippParams(const GeAttrValue::NamedAttrs &aipp_attr, domi::AippOpParams *aipp_params); + static Status ConvertAippParams(const NamedAttrs &aipp_attr, domi::AippOpParams *aipp_params); static Status TransferDim(const std::vector &dim, std::vector &dim_vector); template static void SliceData(const std::vector &input, int64_t chunk_size, std::vector &output, diff --git a/inc/framework/common/string_util.h b/inc/framework/common/string_util.h index 677b1971..21ee4670 100644 --- a/inc/framework/common/string_util.h +++ b/inc/framework/common/string_util.h @@ -78,8 +78,8 @@ class GE_FUNC_VISIBILITY StringUtils { /// @param [in] delim separator /// @return string array after segmentation /// - static std::vector Split(const std::string &str, char delim) { - std::vector elems; + static std::vector> Split(const std::string &str, char delim) { + std::vector> elems; if (str.empty()) { elems.emplace_back(""); diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index 811d5eed..1a4f7cde 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -339,6 +339,8 @@ REGISTER_OPTYPE_DECLARE(PLACEHOLDER, "PlaceHolder"); REGISTER_OPTYPE_DECLARE(END, "End"); REGISTER_OPTYPE_DECLARE(BASICLSTMCELL, "BasicLSTMCell"); REGISTER_OPTYPE_DECLARE(GETNEXT, "GetNext"); +REGISTER_OPTYPE_DECLARE(ITERATOR, "Iterator"); +REGISTER_OPTYPE_DECLARE(ITERATORV2, "IteratorV2"); REGISTER_OPTYPE_DECLARE(INITDATA, "InitData"); REGISTER_OPTYPE_DECLARE(TRANSSHAPE, "TransShape") REGISTER_OPTYPE_DECLARE(REFIDENTITY, "RefIdentity"); diff --git a/inc/framework/common/util.h b/inc/framework/common/util.h index a3989b9d..97528eb6 100644 --- a/inc/framework/common/util.h +++ b/inc/framework/common/util.h @@ -18,8 +18,8 @@ #define INC_FRAMEWORK_COMMON_UTIL_H_ #include -#include -#include +#include +#include #include #include #include @@ -30,17 +30,17 @@ #include "framework/common/ge_inner_error_codes.h" #include "mmpa/mmpa_api.h" -#define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ - do { \ - if (size <= 0) { \ - DOMI_LOGE("param[%s] is not a positive number", #size); \ - return PARAM_INVALID; \ - } \ - } while (0) +#define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ + do { \ + if (size <= 0) { \ + GELOGE(ge::FAILED, "param[%s] is not a positive number", #size); \ + return PARAM_INVALID; \ + } \ + } while (false) #define CHECK_FALSE_EXEC(expr, exec_expr, ...) \ { \ - bool b = (expr); \ + const bool b = (expr); \ if (!b) { \ exec_expr; \ } \ @@ -59,131 +59,133 @@ }); // For propagating errors when calling a function. -#define GE_RETURN_IF_ERROR(expr) \ - do { \ - const ::ge::Status _status = (expr); \ - if (_status) return _status; \ - } while (0) +#define GE_RETURN_IF_ERROR(expr) \ + do { \ + const ge::Status _chk_status = (expr); \ + if (_chk_status != ge::SUCCESS) { \ + return _chk_status; \ + } \ + } while (false) #define GE_RETURN_WITH_LOG_IF_ERROR(expr, ...) \ do { \ - const ::ge::Status _status = (expr); \ - if (_status) { \ - DOMI_LOGE(__VA_ARGS__); \ - return _status; \ + const ge::Status _chk_status = (expr); \ + if (_chk_status != ge::SUCCESS) { \ + GELOGE(ge::FAILED, __VA_ARGS__); \ + return _chk_status; \ } \ - } while (0) + } while (false) // check whether the parameter is true. If it is, return FAILED and record the error log #define GE_RETURN_WITH_LOG_IF_TRUE(condition, ...) \ do { \ if (condition) { \ - DOMI_LOGE(__VA_ARGS__); \ + GELOGE(ge::FAILED, __VA_ARGS__); \ return ge::FAILED; \ } \ - } while (0) + } while (false) // Check if the parameter is false. If yes, return FAILED and record the error log #define GE_RETURN_WITH_LOG_IF_FALSE(condition, ...) \ do { \ - bool _condition = (condition); \ + const bool _condition = (condition); \ if (!_condition) { \ - DOMI_LOGE(__VA_ARGS__); \ + GELOGE(ge::FAILED, __VA_ARGS__); \ return ge::FAILED; \ } \ - } while (0) + } while (false) // Checks whether the parameter is true. If so, returns PARAM_INVALID and records the error log #define GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(condition, ...) \ do { \ if (condition) { \ - DOMI_LOGE(__VA_ARGS__); \ + GELOGE(ge::FAILED, __VA_ARGS__); \ return ge::PARAM_INVALID; \ } \ - } while (0) + } while (false) // Check if the parameter is false. If yes, return PARAM_INVALID and record the error log #define GE_RT_PARAM_INVALID_WITH_LOG_IF_FALSE(condition, ...) \ do { \ - bool _condition = (condition); \ + const bool _condition = (condition); \ if (!_condition) { \ - DOMI_LOGE(__VA_ARGS__); \ + GELOGE(ge::FAILED, __VA_ARGS__); \ return ge::PARAM_INVALID; \ } \ - } while (0) + } while (false) // Check if the parameter is null. If yes, return PARAM_INVALID and record the error #define GE_CHECK_NOTNULL(val) \ do { \ if (val == nullptr) { \ REPORT_INNER_ERROR("E19999", "Param:%s is nullptr, check invalid", #val); \ - DOMI_LOGE("[Check][Param:%s]null is invalid.", #val); \ + GELOGE(ge::FAILED, "[Check][Param:%s]null is invalid.", #val); \ return ge::PARAM_INVALID; \ } \ - } while (0) + } while (false) // Check if the parameter is null. If yes, just return and record the error -#define GE_CHECK_NOTNULL_JUST_RETURN(val) \ - do { \ - if (val == nullptr) { \ - DOMI_LOGE("param[%s] must not be null.", #val); \ - return; \ - } \ - } while (0) +#define GE_CHECK_NOTNULL_JUST_RETURN(val) \ + do { \ + if (val == nullptr) { \ + GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ + return; \ + } \ + } while (false) // Check whether the parameter is null. If so, execute the exec_expr expression and record the error log -#define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \ - do { \ - if (val == nullptr) { \ - DOMI_LOGE("param[%s] must not be null.", #val); \ - exec_expr; \ - } \ - } while (0) +#define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \ + do { \ + if (val == nullptr) { \ + GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ + exec_expr; \ + } \ + } while (false) // Check whether the parameter is null. If yes, return directly and record the error log -#define GE_RT_VOID_CHECK_NOTNULL(val) \ - do { \ - if (val == nullptr) { \ - DOMI_LOGE("param[%s] must not be null.", #val); \ - return; \ - } \ - } while (0) +#define GE_RT_VOID_CHECK_NOTNULL(val) \ + do { \ + if (val == nullptr) { \ + GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ + return; \ + } \ + } while (false) // Check if the parameter is null. If yes, return false and record the error log -#define GE_RT_FALSE_CHECK_NOTNULL(val) \ - do { \ - if (val == nullptr) { \ - DOMI_LOGE("param[%s] must not be null.", #val); \ - return false; \ - } \ - } while (0) +#define GE_RT_FALSE_CHECK_NOTNULL(val) \ + do { \ + if (val == nullptr) { \ + GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ + return false; \ + } \ + } while (false) // Check if the parameter is out of bounds -#define GE_CHECK_SIZE(size) \ - do { \ - if (size == 0) { \ - DOMI_LOGE("param[%s] is out of range", #size); \ - return ge::PARAM_INVALID; \ - } \ - } while (0) +#define GE_CHECK_SIZE(size) \ + do { \ + if (size == 0) { \ + GELOGE(ge::FAILED, "param[%s] is out of range", #size); \ + return ge::PARAM_INVALID; \ + } \ + } while (false) // Check if the value on the left is greater than or equal to the value on the right -#define GE_CHECK_GE(lhs, rhs) \ - do { \ - if (lhs < rhs) { \ - DOMI_LOGE("param[%s] is less than[%s]", #lhs, #rhs); \ - return ge::PARAM_INVALID; \ - } \ - } while (0) +#define GE_CHECK_GE(lhs, rhs) \ + do { \ + if (lhs < rhs) { \ + GELOGE(ge::FAILED, "param[%s] is less than[%s]", #lhs, #rhs); \ + return ge::PARAM_INVALID; \ + } \ + } while (false) // Check if the value on the left is less than or equal to the value on the right -#define GE_CHECK_LE(lhs, rhs) \ - do { \ - if (lhs > rhs) { \ - DOMI_LOGE("param[%s] is greater than[%s]", #lhs, #rhs); \ - return ge::PARAM_INVALID; \ - } \ - } while (0) +#define GE_CHECK_LE(lhs, rhs) \ + do { \ + if (lhs > rhs) { \ + GELOGE(ge::FAILED, "param[%s] is greater than[%s]", #lhs, #rhs); \ + return ge::PARAM_INVALID; \ + } \ + } while (false) #define GE_DELETE_NEW_SINGLE(var) \ do { \ @@ -191,7 +193,7 @@ delete var; \ var = nullptr; \ } \ - } while (0) + } while (false) #define GE_DELETE_NEW_ARRAY(var) \ do { \ @@ -199,18 +201,18 @@ delete[] var; \ var = nullptr; \ } \ - } while (0) + } while (false) #define GE_FREE_RT_LOG(addr) \ do { \ if (addr != nullptr) { \ - rtError_t error = rtFree(addr); \ + const rtError_t error = rtFree(addr); \ if (error != RT_ERROR_NONE) { \ GELOGE(RT_FAILED, "Call rtFree failed, error: %#x", error); \ } \ addr = nullptr; \ } \ - } while (0) + } while (false) /** * @ingroup domi_common @@ -228,12 +230,6 @@ using google::protobuf::Message; /// /// @ingroup domi_common -/// @brief Maximum file path length -/// -const int32_t DOMI_MAX_PATH_LEN = 256; - -/// -/// @ingroup domi_common /// @brief Reads the proto structure from an array. /// @param [in] data proto data to be read /// @param [in] size proto data size @@ -253,8 +249,6 @@ GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *data, int size, Message * /// GE_FUNC_VISIBILITY bool ReadProtoFromText(const char *file, google::protobuf::Message *message); -GE_FUNC_VISIBILITY bool ReadProtoFromMem(const char *data, int size, google::protobuf::Message *message); - /// /// @ingroup: domi_common /// @brief: get length of file @@ -306,10 +300,10 @@ GE_FUNC_VISIBILITY std::string ToString(std::vector &v) { ss << x; ss << ", "; } - std::string strRet = - ss.str().substr(0, ss.str().length() - 2); // Delete the two extra characters at the end of the line. - strRet += "]"; - return strRet; + // Delete the two extra characters at the end of the line. + std::string str = ss.str().substr(0u, ss.str().length() - 2u); + str += "]"; + return str; } /// @@ -326,10 +320,10 @@ GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedField ss << x; ss << ", "; } - std::string strRet = - ss.str().substr(0, ss.str().length() - 2); // Delete the two extra characters at the end of the line. - strRet += "]"; - return strRet; + // Delete the two extra characters at the end of the line. + std::string str = ss.str().substr(0u, ss.str().length() - 2u); + str += "]"; + return str; } /// @@ -394,14 +388,6 @@ GE_FUNC_VISIBILITY bool ValidateStr(const std::string &filePath, const std::stri /// /// @ingroup domi_common -/// @brief Check whether the file is normal file. -/// @param [in] file_path file path -/// @param [out] result -/// -GE_FUNC_VISIBILITY bool IsValidFile(const char *file_path); - -/// -/// @ingroup domi_common /// @brief Check path invalid /// @param [in] path, path to be checked /// @param [in] length, length of path diff --git a/inc/framework/engine/dnnengine.h b/inc/framework/engine/dnnengine.h index 8a0f3b65..b5f02ebe 100644 --- a/inc/framework/engine/dnnengine.h +++ b/inc/framework/engine/dnnengine.h @@ -43,14 +43,31 @@ struct DNNEngineAttribute { // If engine input format must be specific, set this attribute, else set FORMAT_RESERVED Format engine_input_format; Format engine_output_format; + bool atomic_engine_flag; }; class GE_FUNC_VISIBILITY DNNEngine { public: + DNNEngine() = default; + explicit DNNEngine(const DNNEngineAttribute &attrs) { + engine_attribute_ = attrs; + } virtual ~DNNEngine() = default; - virtual Status Initialize(const std::map &options) = 0; - virtual Status Finalize() = 0; - virtual void GetAttributes(DNNEngineAttribute &attr) const = 0; + Status Initialize(const std::map &options) { + return SUCCESS; + } + Status Finalize() { + return SUCCESS; + } + void GetAttributes(DNNEngineAttribute &attr) const { + attr = engine_attribute_; + } + bool IsAtomic() const { + return engine_attribute_.atomic_engine_flag; + } + + protected: + DNNEngineAttribute engine_attribute_; }; } // namespace ge diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h index 5da5a593..86496012 100644 --- a/inc/framework/generator/ge_generator.h +++ b/inc/framework/generator/ge_generator.h @@ -34,13 +34,16 @@ namespace ge { class GeRootModel; class GE_FUNC_VISIBILITY GeGenerator { public: + using InOutTensorRef = std::pair &, const vector &>; static GeGenerator &GetInstance() { static GeGenerator Instance; return Instance; } GeGenerator() = default; - ~GeGenerator() { (void)Finalize(); } + ~GeGenerator() { + (void)Finalize(); + } GeGenerator(const GeGenerator &) = delete; @@ -94,8 +97,8 @@ class GE_FUNC_VISIBILITY GeGenerator { /// @param [in] graph_name: graph name. /// @param [out] graph: graph of single op. /// @return SUCCESS or FAILED - Status BuildSingleOpGraph(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, - std::string graph_name, Graph &graph); + Status BuildSingleOpGraph(OpDescPtr &op_desc, const InOutTensorRef &inputs_outputs, std::string graph_name, + Graph &graph, std::vector> &inputs_name_type); private: Status GenerateModel(const Graph &graph, const string &file_name_prefix, const vector &inputs, @@ -110,6 +113,10 @@ class GE_FUNC_VISIBILITY GeGenerator { using GeRootModelPtr = std::shared_ptr; Status SetModelNameForDump(const GeRootModelPtr &ge_root_model); + Status CreateGeneralizedBuildAttrs(const GeRootModelPtr &ge_root_model, const std::vector &inputs, + const std::vector &outputs, + const std::vector> &inputs_name_type, + std::vector &generalized_build_attrs); class Impl; diff --git a/inc/framework/omg/version.h b/inc/framework/omg/version.h index 4facba0d..a1be09ed 100644 --- a/inc/framework/omg/version.h +++ b/inc/framework/omg/version.h @@ -33,7 +33,7 @@ class GE_FUNC_VISIBILITY PlatformVersionManager { ~PlatformVersionManager() = delete; static Status GetPlatformVersion(std::string &ver) { ver = "1.11.z"; - std::vector version_splits = StringUtils::Split(ver, '.'); + const std::vector version_splits = StringUtils::Split(ver, '.'); GE_IF_BOOL_EXEC(version_splits.size() < 3, GELOGW("Read platform version error!"); return FAILED;); GELOGI("Read current platform version: %s.", ver.c_str()); diff --git a/metadef b/metadef index 60df4b39..b21fe2bc 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 60df4b39a6f639c21dd7deb220b93345451938f5 +Subproject commit b21fe2bccb97e64fa2c7dff8ffd559adc853e73d diff --git a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h index c5423d36..a2d805fb 100644 --- a/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h +++ b/third_party/fwkacllib/inc/external/runtime/rt_error_codes.h @@ -1,109 +1,110 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__ -#define __INC_EXTERNEL_RT_ERROR_CODES_H__ - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -static const int32_t ACL_RT_SUCCESS = 0; // success - -static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid -static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id -static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null -static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context -static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context -static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal -static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned -static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed -static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed -static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream -static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread -static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set -static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create -static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream -static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type -static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle -static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type -static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout - -static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support -static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error -static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error -static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow -static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device -static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail -static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission -static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource -static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource -static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource -static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource -static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource - -static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error -static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error -static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream -static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream -static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete -static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence -static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete -static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error -static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error -static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support -static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat -static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed -static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout -static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error -static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout -static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception -static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception -static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout -static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception -static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error -static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error -static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error -static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error -static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal -static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering -static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init -static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data -static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error -static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate -static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed -static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed -static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context -static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out -static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error -static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout -static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception -static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception -static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal - -static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error -static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error -static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect - -#ifdef __cplusplus -} -#endif - -#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ +/** +* @file rt_error_codes.h +* +* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__ +#define __INC_EXTERNEL_RT_ERROR_CODES_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static const int32_t ACL_RT_SUCCESS = 0; // success + +static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid +static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id +static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null +static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context +static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context +static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal +static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned +static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed +static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed +static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream +static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread +static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set +static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create +static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream +static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type +static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle +static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type +static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout + +static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support +static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error +static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error +static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow +static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device +static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail +static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission +static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource +static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource +static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource +static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource +static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource +static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012; // over limit +static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013; // queue is empty +static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014; // queue is full + +static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error +static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error +static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream +static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream +static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete +static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence +static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete +static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error +static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error +static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support +static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat +static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed +static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout +static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error +static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout +static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception +static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception +static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout +static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception +static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error +static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error +static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error +static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error +static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal +static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering +static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init +static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data +static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error +static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate +static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed +static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed +static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context +static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out +static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error +static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout +static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception +static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception +static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal +static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode +static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die +static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id +static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set + +static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error +static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error +static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect + +#ifdef __cplusplus +} +#endif + +#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h index 4b45f4cf..e780ce1b 100644 --- a/third_party/fwkacllib/inc/ops/array_ops.h +++ b/third_party/fwkacllib/inc/ops/array_ops.h @@ -1258,21 +1258,21 @@ REG_OP(ExpandD) * Three inputs, including: * @li bucket_list: A 1-D tensor of type int32 with the value of ivf_counts and ivf_offset index. \n * @li ivf_counts: A 1-D tensor of type int32 with the value of ivf counts. \n -* @li ivf_offset: A 1-D tensor of type int32 with the value of ivf offset. \n +* @li ivf_offset: A 1-D tensor of type int32 or int64 with the value of ivf offset. \n * @par Attributes: * total_limit: A int64 type maximum value of the sum of ivf_counts corresponding to bucket_list. \n * @par Outputs: * @li buckets_limit: A 1-D tensor of type int32 with the sum <= total_limit. \n -* @li buckets_offset: A 1-D tensor of type int32 with the value of ivf_offset corresponding to bucket_list. \n +* @li buckets_offset: A 1-D tensor of type int32 or int64 with the value of ivf_offset corresponding to bucket_list. \n */ REG_OP(CalcBucketsLimitAndOffset) .INPUT(bucket_list, TensorType({DT_INT32})) .INPUT(ivf_counts, TensorType({DT_INT32})) - .INPUT(ivf_offset, TensorType({DT_INT32})) + .INPUT(ivf_offset, TensorType({DT_INT32, DT_INT64})) .OUTPUT(buckets_limit, TensorType({DT_INT32})) - .OUTPUT(buckets_offset, TensorType({DT_INT32})) + .OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64})) .REQUIRED_ATTR(total_limit, Int) .OP_END_FACTORY_REG(CalcBucketsLimitAndOffset) diff --git a/third_party/fwkacllib/inc/ops/cluster.h b/third_party/fwkacllib/inc/ops/cluster.h new file mode 100644 index 00000000..19b4ea05 --- /dev/null +++ b/third_party/fwkacllib/inc/ops/cluster.h @@ -0,0 +1,58 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * \file cluster.h + * \brief + */ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_CLUSTER_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_CLUSTER_H_ + +#include "graph/operator_reg.h" +#include "graph/operator.h" + +namespace ge { +/** +* @brief Perform k-means clustering on a data matrix. \n + +* @par Inputs: +* Three required inputs and one optional inputs, including: \n +* @li x: A 2D tensor of data type float32. \n +* @li y: A 2D tensor of data type float32. \n +* @li sum_square_x: An optional 2D tensor of data type float32. \n +* @li sum_square_y: A 2D tensor of data type float32. \n + +* @par Attributes: +* use_actual_distance: Indicates whether to calculate the complete distance. \n + +* @par Outputs: +* @li segment_sum: A tensor of data type float32. \n +* @li segment_count: A tensor of data type float32. \n +* @li k_mean_total_sum: A tensor of data type float32. \n +*/ +REG_OP(KMeansCentroids) + .INPUT(x, TensorType({DT_FLOAT})) + .INPUT(y, TensorType({DT_FLOAT})) + .INPUT(sum_square_y, TensorType({DT_FLOAT})) + .OPTIONAL_INPUT(sum_square_x, TensorType({DT_FLOAT})) + .OUTPUT(segment_sum, TensorType({DT_FLOAT})) + .OUTPUT(segment_count, TensorType({DT_FLOAT})) + .OUTPUT(kmean_total_sum, TensorType({DT_FLOAT})) + .ATTR(use_actual_distance, Bool, false) + .OP_END_FACTORY_REG(KMeansCentroids) +} // namespace ge + +#endif // OPS_BUILT_IN_OP_PROTO_INC_CLUSTER_H_ diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index bcf50058..cd41d6fa 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -3391,57 +3391,57 @@ REG_OP(TensorRedirect) .OP_END_FACTORY_REG(TensorRedirect) /** -* @brief Performs the element-wise division of tensor x2 by tensor x3, -* multiply the result by the scalar value and add it to tensor x1 +* @brief Performs the element-wise division of tensor x1 by tensor x2, +* multiply the result by the scalar value and add it to tensor input_data. * @par Inputs: * Four inputs, including: * @li input_data: A mutable input Tensor. Must be one of the following types: -* float16, float32. -* @li x1: A mutable input Tensor of the same type as x1. -* @li x2: A mutable input Tensor of the same type as x1. +* float16, float32, double, int64. +* @li x1: A mutable input Tensor of the same type as input_data. +* @li x2: A mutable input Tensor of the same type as input_data. * @li value: A mutable input Tensor. Must be one of the following types: -* float16, float32, int32. \n +* float16, float32, double, int64, int32. \n + * @par Outputs: -* y: A mutable Tensor. Has the same type as "x1". \n +* y: A mutable Tensor. Has the same type as input_data. \n * @par Third-party framework compatibility -* Compatible with the Pytorch operator Addcdiv. +* Compatible with the Pytorch operator Addcdiv(version-1.5.0). */ REG_OP(Addcdiv) - .INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64})) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64})) + .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32, DT_DOUBLE, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64})) .OP_END_FACTORY_REG(Addcdiv) /** -* @brief Performs the element-wise multiplication of tensor x2 by tensor x3, +* @brief Performs the element-wise multiplication of tensor x1 by tensor x2, * multiply the result by the scalar value and add it to tensor input_data - * @par Inputs: * Four inputs, including: * @li input_data: A mutable input Tensor. Must be one of the following types: -* float16, float32, int8, int32, uint8. -* @li x1: A mutable input Tensor of the same type as x1. -* @li x2: A mutable input Tensor of the same type as x1. -* @li value: A tensor which includes only one element of the same type as x1. \n +* float16, float32, double, int64, int8, int32, uint8. +* @li x1: A mutable input Tensor of the same type as input_data. +* @li x2: A mutable input Tensor of the same type as input_data. +* @li value: A tensor which includes only one element of the same type as input_data. \n * @par Outputs: -* y: A mutable output Tensor. Has the same type as "x1". \n +* y: A mutable output Tensor. Has the same type as input_data. \n * @par Third-party framework compatibility * Compatible with the Pytorch operator Addcmul. */ REG_OP(Addcmul) - .INPUT(input_data, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) - .INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) - .INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) - .INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) - .OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) + .INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64})) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64})) + .INPUT(value, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64})) .OP_END_FACTORY_REG(Addcmul) /** @@ -3508,8 +3508,8 @@ REG_OP(StrideAdd) * Compatible with the Pytorch equal operator. \n */ REG_OP(TensorEqual) - .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) - .INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) + .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_INT8, DT_UINT8})) + .INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_INT8, DT_UINT8})) .OUTPUT(output_z, TensorType({DT_BOOL})) .OP_END_FACTORY_REG(TensorEqual) diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h index f6cc8694..5e31bebd 100644 --- a/third_party/fwkacllib/inc/ops/linalg_ops.h +++ b/third_party/fwkacllib/inc/ops/linalg_ops.h @@ -410,10 +410,10 @@ form square matrices. \n */ REG_OP(Svd) - .INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT })) - .OUTPUT(sigma, TensorType({ DT_DOUBLE, DT_FLOAT })) - .OUTPUT(u, TensorType({ DT_DOUBLE, DT_FLOAT })) - .OUTPUT(v, TensorType({ DT_DOUBLE, DT_FLOAT })) + .INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 })) + .OUTPUT(sigma, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 })) + .OUTPUT(u, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 })) + .OUTPUT(v, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 })) .ATTR(compute_uv, Bool, true) .ATTR(full_matrices, Bool, false) .OP_END_FACTORY_REG(Svd) diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index 6eb418d8..2c5afbe7 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -939,6 +939,57 @@ REG_OP(LpNorm) .OP_END_FACTORY_REG(LpNorm) /** +* @brief Computes LpNormReduce. + +* @par Inputs: +* x: An ND tensor of type float16, float32. \n +* +* @par Attributes: +* @li p: Int, "inf" or "-inf", default value is 2. +* @li axes: ListInt, {} means all axes will be computed. +* @li keepdim: Bool, default is false. +* @li epsilon: Float, default is 1e-12. \n + +* @par Outputs: +* y: An ND tensor of type float16, float32. The shape of y is depending +* on axes and keepdim. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator LpNormReduce. +*/ +REG_OP(LpNormReduce) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(p, Int, 2) + .ATTR(axes, ListInt, {}) + .ATTR(keepdim, Bool, false) + .ATTR(epsilon, Float, 1e-12) + .OP_END_FACTORY_REG(LpNormReduce) + +/** +* @brief Computes LpNormUpdate. + +* @par Inputs: +* x: An ND tensor of type float16, float32. \n +* +* @par Attributes: +* @li p: Int, "inf" or "-inf", default value is 2. +* @li epsilon: Float, default is 1e-12. \n + +* @par Outputs: +* y: An ND tensor of type float16, float32. \n + +* @par Third-party framework compatibility +* Compatible with the Pytorch operator LpNormUpdate. +*/ +REG_OP(LpNormUpdate) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(p, Int, 2) + .ATTR(epsilon, Float, 1e-12) + .OP_END_FACTORY_REG(LpNormUpdate) + +/** * @brief get complex. * @par Inputs: diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index e82251bb..55199962 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -49,10 +49,10 @@ namespace ge { * Compatible with the TensorFlow operator BatchMatmul. */ REG_OP(MatMul) - .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) - .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) + .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) .ATTR(transpose_x1, Bool, false) .ATTR(transpose_x2, Bool, false) .OP_END_FACTORY_REG(MatMul) @@ -88,10 +88,10 @@ REG_OP(MatMul) * Compatible with the TensorFlow operator BatchMatmul. */ REG_OP(MatMulV2) - .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4})) - .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8 DT_INT4})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16})) + .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4})) .ATTR(transpose_x1, Bool, false) .ATTR(transpose_x2, Bool, false) @@ -1067,6 +1067,40 @@ REG_OP(MatrixSetDiagV2) .OP_END_FACTORY_REG(MatrixSetDiagV2) /** +*@brief Returns a batched matrix tensor with new batched diagonal values . \n + +*@par Inputs: +* Three inputs, including: +*@li input: "Rank `r+1`, where `r >= 1`. \n + +*@li diagonal: Rank `r` when `k` is an integer or `k[0] == k[1]`. Otherwise, it has rank `r+1`. \n + +*@li k: +*Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main \n +*diagonal, and negative value means subdiagonals. `k` can be a single integer \n +*(for a single diagonal) or a pair of integers specifying the low and high ends \n +*of a matrix band. `k[0]` must not be larger than `k[1]`. \n + +*@par Attributes: +*@li align: An optional string. Defaults to RIGHT_LEFT. It is a string specifying \n +*how superdiagonals and subdiagonals should be aligned, respectively. \n +*other optional: LEFT_RIGHT, LEFT_LEFT, and RIGHT_RIGHT.\n + +*@par Outputs: +*output: Rank `r+1`, with `output.shape = input.shape` . \n + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterUpdate. +*/ +REG_OP(MatrixSetDiagV3) + .INPUT(input, TensorType::BasicType()) + .INPUT(diagonal, TensorType::BasicType()) + .INPUT(k, TensorType({DT_INT32})) + .OUTPUT(output, TensorType::BasicType()) + .ATTR(align, String, "RIGHT_LEFT") + .OP_END_FACTORY_REG(MatrixSetDiagV3) + +/** *@brief Returns a batched diagonal tensor with given batched diagonal values . \n *@par Inputs: diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index ed7cb9b5..b0cb15fb 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -369,16 +369,14 @@ REG_OP(BiasAddGrad) *\n * The following are the supported data types and data formats:\n *\n - | Tensor | out_bckprop | filter | y\n - ------------|-------------|---------|--------\n - | Data Type | float16 | float16 | float16\n - | |-------------|---------|--------\n - | | float32 | float32 | float32\n - | |-------------|---------|--------\n - | | float64 | float64 | float64\n - ------------|-------------|---------|--------\n - | Format | NCHW | NCHW | NCHW\n - | | NHWC | HWCN | NHWC\n + *\n + | Tensor | out_bckprop | filter | y |\n + |-----------|-------------|---------|--------|\n + | Data Type | float16 | float16 | float16|\n + | | float32 | float32 | float32|\n + | | float64 | float64 | float64|\n + | Format | NCHW | NCHW | NCHW |\n + | | NHWC | HWCN | NHWC |\n *\n * For float32 and float64 type, the actual calculation on the chip is based on * float16. @@ -400,30 +398,25 @@ REG_OP(BiasAddGrad) *\n * The following value range restrictions must be met:\n *\n - | Name | Field | Scope\n - -------------------|----------|--------------\n - | input_size | H | [1, 200000]\n - | | W | [1, 4096]\n - -------------------|----------|--------------\n - | Filter | H | [1, 255]\n - | | W | [1, 255]\n - -------------------|----------|--------------\n - | out_backprop | H*strideH| [1, 200000]\n - | | W*strideW| [1, 4096]\n - -------------------|----------|--------------\n - | y(fmap) | H | [1, 200000]\n - | | W | [1, 4096]\n - -------------------|----------|--------------\n - | Stride | H | [1, 63]\n - | | W | [1, 63]\n - -------------------|----------|--------------\n - | Padding | Top | [0, 255]\n - | | Bottom | [0, 255]\n - | | Left | [0, 255]\n - | | Right | [0, 255]\n - -------------------|----------|--------------\n - | Dilation | H | [1, 255]\n - | | W | [1, 255]\n + *\n + | Name | Field | Scope |\n + |------------------|----------|--------------|\n + | input_size | H | [1, 200000] |\n + | | W | [1, 4096] |\n + | Filter | H | [1, 255] |\n + | | W | [1, 255] |\n + | out_backprop | H*strideH| [1, 200000] |\n + | | W*strideW| [1, 4096] |\n + | y(fmap) | H | [1, 200000] |\n + | | W | [1, 4096] |\n + | Stride | H | [1, 63] |\n + | | W | [1, 63] |\n + | Padding | Top | [0, 255] |\n + | | Bottom | [0, 255] |\n + | | Left | [0, 255] |\n + | | Right | [0, 255] |\n + | Dilation | H | [1, 255] |\n + | | W | [1, 255] |\n *\n * In Ascend910, fmap or out_backprop's H and W not support 1 when @@ -495,9 +488,9 @@ REG_OP(Conv2DBackpropInput) * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv2DBackpropInput instead. */ REG_OP(Conv2DBackpropInputD) - .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) - .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32})) + .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8, DT_BF16})) + .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_INT8, DT_BF16})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32, DT_BF16})) .REQUIRED_ATTR(input_size, ListInt) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) @@ -523,13 +516,12 @@ REG_OP(Conv2DBackpropInputD) *\n * The following are the supported data types and data formats:\n *\n - | Tensor | x | filter | bias | y\n - ------------|---------|---------|---------|--------\n - | Data Type | float16 | float16 | float16 | float16\n - | |---------|---------|---------|--------\n - | | int8 | int8 | int32 | int32\n - ------------|---------|---------|---------|--------\n - | Format | NCHW | NCHW | ND | NCHW\n + *\n + | Tensor | x | filter | bias | y |\n + |-----------|---------|---------|---------|--------|\n + | Data Type | float16 | float16 | float16 | float16|\n + | | int8 | int8 | int32 | int32 |\n + | Format | NCHW | NCHW | ND | NCHW |\n *\n * For int8, a dequant or requant operator must be followed. *\n @@ -553,29 +545,24 @@ REG_OP(Conv2DBackpropInputD) *\n * The following value range restrictions must be met:\n *\n - | Name | Field | Scope\n - -------------------|----------|--------------\n - | x (out_backprop) | H*strideH| [1, 200000]\n - | | W*strideW| [1, 4096]\n - -------------------|----------|--------------\n - | Filter | H | [1, 255]\n - | | W | [1, 255]\n - -------------------|----------|--------------\n - | y (fmap) | H | [1, 200000]\n - | | W | [1, 4096]\n - -------------------|----------|--------------\n - | Stride | H | [1, 63]\n - | | W | [1, 63]\n - -------------------|----------|--------------\n - | Padding | Top | [0, 255]\n - | | Bottom | [0, 255]\n - | | Left | [0, 255]\n - | | Right | [0, 255]\n - -------------------|----------|--------------\n - | Dilation | H | [1, 255]\n - | | W | [1, 255]\n - -------------------|----------|--------------\n - | Offset_x | | [-128, 127]\n + *\n + | Name | Field | Scope |\n + |------------------|----------|--------------|\n + | x (out_backprop) | H*strideH| [1, 200000] |\n + | | W*strideW| [1, 4096] |\n + | Filter | H | [1, 255] |\n + | | W | [1, 255] |\n + | y (fmap) | H | [1, 200000] |\n + | | W | [1, 4096] |\n + | Stride | H | [1, 63] |\n + | | W | [1, 63] |\n + | Padding | Top | [0, 255] |\n + | | Bottom | [0, 255] |\n + | | Left | [0, 255] |\n + | | Right | [0, 255] |\n + | Dilation | H | [1, 255] |\n + | | W | [1, 255] |\n + | Offset_x | | [-128, 127] |\n *\n * In Ascend910, fmap or out_backprop's H and W not support 1 when * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 @@ -631,16 +618,14 @@ REG_OP(Deconvolution) *\n * The following are the supported data types and data formats:\n *\n - | Tensor | x | out_backprop | y\n - ------------|---------|--------------|---------\n - | Data Type | float16 | float16 | float16\n - | |---------|--------------|---------\n - | | float32 | float32 | float32\n - | |---------|--------------|---------\n - | | float64 | float64 | float64\n - |-----------|---------|--------------|---------\n - | Format | NCHW | NCHW | NCHW\n - | | NHWC | NHWC | HWCN\n + *\n + | Tensor | x | out_backprop | y |\n + |-----------|---------|--------------|---------|\n + | Data Type | float16 | float16 | float16 |\n + | | float32 | float32 | float32 |\n + | | float64 | float64 | float64 |\n + | Format | NCHW | NCHW | NCHW |\n + | | NHWC | NHWC | HWCN |\n *\n * For float32 and float64 type of x and outbackprop, the actual calculation on the chip * is based on float16. @@ -662,30 +647,25 @@ REG_OP(Deconvolution) *\n * The following value range restrictions must be met:\n *\n - | Name | Field | Scope\n - -------------------|----------|--------------\n - | x(fmap) | H | [1, 200000]\n - | | W | [1, 4096]\n - -------------------|----------|--------------\n - | Filter Size | H | [1, 255]\n - | | W | [1, 255]\n - -------------------|----------|--------------\n - | out_backprop | H | [1, 200000]\n - | | W | [1, 4096]\n - -------------------|----------|--------------\n - | y | H | [1, 200000]\n - | | W | [1, 4096]\n - -------------------|----------|--------------\n - | Stride | H | [1, 63]\n - | | W | [1, 63]\n - -------------------|----------|--------------\n - | Padding | Top | [0, 255]\n - | | Bottom | [0, 255]\n - | | Left | [0, 255]\n - | | Right | [0, 255]\n - -------------------|----------|--------------\n - | Dilation | H | [1, 255]\n - | | W | [1, 255]\n + *\n + | Name | Field | Scope |\n + |------------------|----------|--------------|\n + | x(fmap) | H | [1, 200000] |\n + | | W | [1, 4096] |\n + | Filter Size | H | [1, 255] |\n + | | W | [1, 255] |\n + | out_backprop | H | [1, 200000] |\n + | | W | [1, 4096] |\n + | y | H | [1, 200000] |\n + | | W | [1, 4096] |\n + | Stride | H | [1, 63] |\n + | | W | [1, 63] |\n + | Padding | Top | [0, 255] |\n + | | Bottom | [0, 255] |\n + | | Left | [0, 255] |\n + | | Right | [0, 255] |\n + | Dilation | H | [1, 255] |\n + | | W | [1, 255] |\n *\n *@par Outputs: * y: A Tensor. Has the same type as x, has the same format as filter_size. @@ -853,11 +833,11 @@ REG_OP(Conv2DBackpropFilterD) *@li Compatible with the Caffe operator 2D "Convolution". */ REG_OP(Conv2D) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) - .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_BF16})) + .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_BF16})) .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_BF16})) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) .ATTR(dilations, ListInt, {1, 1, 1, 1}) @@ -1441,14 +1421,13 @@ REG_OP(Conv3DTransposeD) *\n * The following are the supported data types and data formats:\n *\n - | Tensor | x | filter | bias | y\n - ------------|---------|---------|---------|--------\n - | Data Type | float16 | float16 | float16 | float16\n - | |---------|---------|---------|--------\n - | | int8 | int8 | int32 | int32\n - ------------|---------|---------|---------|--------\n - | Format | NCHW | NCHW | ND | NCHW\n - | | NHWC | HWCN | | NHWC\n + *\n + | Tensor | x | filter | bias | y |\n + |-----------|---------|---------|---------|--------|\n + | Data Type | float16 | float16 | float16 | float16|\n + | | int8 | int8 | int32 | int32 |\n + | Format | NCHW | NCHW | ND | NCHW |\n + | | NHWC | HWCN | | NHWC |\n *\n * For int8, a dequant or requant operator must be followed. *\n @@ -1476,32 +1455,26 @@ REG_OP(Conv3DTransposeD) *\n * The following value range restrictions must be met:\n *\n - | Name | Field | Scope\n - -------------------|----------|--------------\n - | input_size | H | [1, 200000]\n - | | W | [1, 4096]\n - -------------------|----------|--------------\n - | x (out_backprop) | H*strideH| [1, 200000]\n - | | W*strideW| [1, 4096]\n - -------------------|----------|--------------\n - | filter | H | [1, 255]\n - | | W | [1, 255]\n - -------------------|----------|--------------\n - | y (fmap) | H | [1, 200000]\n - | | W | [1, 4096]\n - -------------------|----------|--------------\n - | Stride | H | [1, 63]\n - | | W | [1, 63]\n - -------------------|----------|--------------\n - | Padding | Top | [0, 255]\n - | | Bottom | [0, 255]\n - | | Left | [0, 255]\n - | | Right | [0, 255]\n - -------------------|----------|--------------\n - | Dilation | H | [1, 255]\n - | | W | [1, 255]\n - -------------------|----------|--------------\n - | Offset_x | | [-128, 127]\n + *\n + | Name | Field | Scope |\n + |------------------|----------|--------------|\n + | input_size | H | [1, 200000] |\n + | | W | [1, 4096] |\n + | x (out_backprop) | H*strideH| [1, 200000] |\n + | | W*strideW| [1, 4096] |\n + | filter | H | [1, 255] |\n + | | W | [1, 255] |\n + | y (fmap) | H | [1, 200000] |\n + | | W | [1, 4096] |\n + | Stride | H | [1, 63] |\n + | | W | [1, 63] |\n + | Padding | Top | [0, 255] |\n + | | Bottom | [0, 255] |\n + | | Left | [0, 255] |\n + | | Right | [0, 255] |\n + | Dilation | H | [1, 255] |\n + | | W | [1, 255] |\n + | Offset_x | | [-128, 127] |\n *\n * In Ascend910, fmap or out_backprop's H and W not support 1 when * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index 0011c72e..b14cc49d 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -205,7 +205,8 @@ the value "5" indicates the indexes of images where the ROIs are located, "x0", *@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image. *@li sample_num: An optional attribute of type int, specifying the horizontal and vertical sampling frequency of each output. If this attribute is set to "0", the sampling frequency is -equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . \n +equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . +*@li roi_end_mode: An optional attribute of type int, specifying the align mode .\n *@par Outputs: *xdiff: Gradient added to input "features". Has the same 5HD shape as input "features". @@ -220,6 +221,7 @@ REG_OP(ROIAlignGrad) .REQUIRED_ATTR(pooled_height, Int) .REQUIRED_ATTR(spatial_scale, Float) .ATTR(sample_num, Int, 2) + .ATTR(roi_end_mode, Int, 1) .OP_END_FACTORY_REG(ROIAlignGrad) /** diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index 067357de..cf332e63 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -59,6 +59,25 @@ REG_OP(HardSwish) .OP_END_FACTORY_REG(HardSwish) /** +*@brief Computes the gradient for the hard_swish of "x" . \n + +* @par Inputs: +*Two inputs, including: +* @li grad: A Tensor. Must be one of the following types: float16, float32 +* @li x: A Tensor of the same type as "grad" . \n + +*@par Outputs: +*y: A Tensor. Has the same type as "grad". +* @par Third-party framework compatibility +* Compatible with the Torch operator HardSwishGrad. +*/ +REG_OP(HardSwishGrad) + .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OP_END_FACTORY_REG(HardSwishGrad) + +/** *@brief Computes the for the Swish of "x" . \n *@par Inputs: @@ -81,6 +100,29 @@ REG_OP(Swish) .OP_END_FACTORY_REG(Swish) /** +*@brief Computes the gradient for the Swish of "x" . \n + +*@par Inputs: +*Three inputs, including: +* @li grad: A Tensor. Must be one of the following types: float16, float32 +* @li x: A Tensor of the same type as "grad". +* @li y: A Tensor of the same type as "grad" . \n +* @par Attributes: +* scale: A optional scalar. The data type is float . \n +*@par Outputs: +*grad_x: A Tensor. Has the same type as "grad". +*@par Third-party framework compatibility +*Compatible with the Torch operator SwishGrad +*/ +REG_OP(SwishGrad) + .INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(grad_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(scale, Float, 1.0) + .OP_END_FACTORY_REG(SwishGrad) + +/** *@brief Computes the gradient for the gelu of "x" . \n *@par Inputs: diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h index 9d0e7a62..6d4bcd5e 100644 --- a/third_party/fwkacllib/inc/ops/pad_ops.h +++ b/third_party/fwkacllib/inc/ops/pad_ops.h @@ -274,6 +274,38 @@ REG_OP(PadV3) .ATTR(mode, String, "constant") .ATTR(paddings_contiguous, Bool, true) .OP_END_FACTORY_REG(PadV3) + + /** +*@brief Cal the grad of Pads. + +*@par Inputs: +*Two inputs, including: +* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, +* uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, +* complex128, uint32, uint64. +* @li paddings: A Tensor of type int32 or int64. + +*@par Attributes: +* @li mode: An optional string, Defaults to "reflect", indicates paddings mode, +* support "reflect", "edge" +* @li paddings_contiguous: An optional bool value, Defaults to true. +* If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...] +* If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...] + +*@par Outputs: +*y: A Tensor of the same type as "x". + +*@par Third-party framework compatibility: +* Compatible with ONNX operator PadGrad. +*/ + +REG_OP(PadV3Grad) + .INPUT(x, TensorType::BasicType()) + .INPUT(paddings, TensorType::IndexNumberType()) + .OUTPUT(y, TensorType::BasicType()) + .ATTR(mode, String, "reflect") + .ATTR(paddings_contiguous, Bool, true) + .OP_END_FACTORY_REG(PadV3Grad) /** *@brief Pads a tensor. diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h index 66f9b65f..ad7f9003 100644 --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -685,6 +685,24 @@ REG_OP(Uniform) .ATTR(from, Float, 0.0) .ATTR(to, Float, 1.0) .OP_END_FACTORY_REG(Uniform) -} // namespace ge +/** +*@brief Outputs integers consisting of 0 and 1, used for lstm etc. \n +*@par Inputs +* @li time_step: A tensor with data type int64. 0-D. +* @li batch_size: A tensor with data type int64. 0-D. + +*@par Outputs: +*y: A Tensor. Has the type float16 or float, 2-D, [time_step,batch_size]. \n + +*@attention Constraints: +* Compatible with the Caffe operator ContinuationIndicator. +*/ + +REG_OP(ContinuationIndicator) + .REQUIRED_ATTR(time_step, Int) + .REQUIRED_ATTR(batch_size, Int) + .OUTPUT(y, TensorType({DT_FLOAT})) + .OP_END_FACTORY_REG(ContinuationIndicator) +} // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index 1578ba59..0e578d86 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -1275,7 +1275,7 @@ REG_OP(ReduceStd) * @par Attributes: -* Three Attributes, including: +* Five Attributes, including: * @li dim: An optional listint, Defaults to "None". \n * @li unbiased: An optional bool. Defaults to "True". * If "True", Use Bessel Correction. @@ -1283,9 +1283,14 @@ REG_OP(ReduceStd) * @li keepdim: An optional bool. Defaults to "False". * If "True", Keep the original tensor dimension. * If "False", Do not keep the original tensor dimension. \n +* @li invert: An optional bool, Defaults to "False". +* If "True", the output is inverse of variance. +* If "False", the output is variance. +* @li epsilon: An optional floar, Defaults to 0.001. +* Prevent division by 0. * @par Outputs: -* @li y: A Tensor. It's the std of X. Has the same type as "x". +* @li y: A Tensor. It's the variance of X or reciprocal of vaiance of X. Has the same type as "x". * @par Third-party framework compatibility * Compatible with the Pytorch operator ReduceStdWithMean. @@ -1297,6 +1302,8 @@ REG_OP(ReduceStdWithMean) .ATTR(dim, ListInt, {}) .ATTR(unbiased, Bool, true) .ATTR(keepdim, Bool, false) + .ATTR(invert, Bool, false) + .ATTR(epsilon, Float, 0.001) .OP_END_FACTORY_REG(ReduceStdWithMean) } //namespace ge diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h index cc0bff00..b374fa5c 100644 --- a/third_party/fwkacllib/inc/ops/rnn.h +++ b/third_party/fwkacllib/inc/ops/rnn.h @@ -822,7 +822,7 @@ REG_OP(DynamicGRU) *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. *@li bias_input:Must be one of the following types: float16, float32. The format must be ND. *@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. -*@li seq_length:Must be one of the following types: int32. The format must be ND. +*@li seq_length:Must be one of the following types: float16 in FRACTAL_NZ and int32 in ND. *@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@par Attributes: @@ -852,7 +852,7 @@ REG_OP(DynamicGRUV2) .INPUT(weight_hidden, TensorType({DT_FLOAT16})) .OPTIONAL_INPUT(bias_input, TensorType({DT_FLOAT16, DT_FLOAT})) .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) - .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16})) .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -880,7 +880,7 @@ REG_OP(DynamicGRUV2) *@li x_weight_input:Must be one of the following types: float32. The format must be FRACTAL_NZ. *@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. *@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. -*@li seq_length:Must be one of the following types: int32. The format must be ND. +*@li seq_length:Must be one of the following types: float16 in FRACTAL_NZ and int32 in ND. *@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@par Attributes: @@ -913,7 +913,7 @@ REG_OP(DynamicGRUV2Hidden) .INPUT(x_weight_input, TensorType({DT_FLOAT32})) .INPUT(weight_hidden, TensorType({DT_FLOAT16})) .OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) - .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) + .OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16})) .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1050,6 +1050,50 @@ REG_OP(GRUV2HiddenGradCell) .OP_END_FACTORY_REG(GRUV2HiddenGradCell) /** +*@brief: DynamicGRUCellGrad calculation. +*@par Inputs: +*ten inputs: \n +*@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.+ +*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li t_state:A 1D Tensor. Must be one of the following types: int32. The format must be ND. + +*@par Attributes: +*gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. + +*@par Outputs: +*three outputs: \n +*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(DynamicGRUCellGrad) + .INPUT(dh_pre_t, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(update, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(new, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(t_state, TensorType({DT_INT32, DT_INT32})) + .OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(gate_order, String, "zrh") + .OP_END_FACTORY_REG(DynamicGRUCellGrad) + +/** * @brief Calculates the reversed outputs of the function "embedding". \n * @par Inputs: @@ -1137,8 +1181,8 @@ REG_OP(CommonLSTM) * * @par Inputs: * @li seq_length: A 1D Tensor. Must be one of the following types: int32. Record the current length of each batch. [batch_size]. - * @li b: A 1D Tensor. Must be one of the following types: fp16/fp32. Record the hidden_size. [4 * hidden_size]. * @li x: A 3D Tensor. Must be one of the following types: fp16/fp32. Record the num_step/batch_size/input_size. [num_step, batch_size, input_size]. + * @li hidden_size: An optional attribute of type int32. pass the hidden_size. \n * * @par Outputs: * seq_mask: A 3D Tensor. Must be one of the following types: fp16/fp32. with the shape of [num_step, batch_size, hidden_size]. And has the same type as "b" \n @@ -1148,8 +1192,8 @@ REG_OP(CommonLSTM) */ REG_OP(RnnGenMaskV2) .INPUT(seq_length, TensorType({DT_INT32})) - .INPUT(b, TensorType({{DT_FLOAT16, DT_FLOAT})) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(hidden_size, Int) .OUTPUT(seq_mask, TensorType({DT_FLOAT16, DT_FLOAT})) .OP_END_FACTORY_REG(RnnGenMaskV2) diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index 7f7c4fc8..08fb25a3 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -1381,7 +1381,7 @@ REG_OP(InplaceUpdate) .INPUT(x, TensorType::BasicType()) .INPUT(indices, TensorType({DT_INT32})) .INPUT(v, TensorType::BasicType()) - .OUTPUT(y, TensorType::BasicType()) + .OUTPUT(x, TensorType::BasicType()) .OP_END_FACTORY_REG(InplaceUpdate) /** @@ -2408,6 +2408,40 @@ REG_OP(TopKPQDistanceMerge) .OUTPUT(topk_index, TensorType({DT_INT32})) .REQUIRED_ATTR(k, Int) .OP_END_FACTORY_REG(TopKPQDistanceMerge) + +/** +*@brief Extracts a strided slice of a tensor. Roughly speaking, this op + extracts a slice of size (end-begin)/stride from the given input tensor. + Starting at the location specified by begin the slice continues by + adding stride to the index until all dimensions are not less than end. + +*@par Inputs: +*Four inputs, including: +* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, +* complex128, float16, uint32, uint64. +* @li begin: A Tensor of type int32 or int64, for the index of the first value to select . \n + +* @li end: A Tensor of type int32 or int64, for the index of the last value to select . \n + +* @li strides: A Tensor of type int32 or int64, for the increment . \n + +* @li axes: A Tensor of type int32 or int64, for the increment . \n + +*@par Outputs: +*y: A Tensor. Has the same type as "x" . \n + +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(StridedSliceV3) + .INPUT(x, TensorType::BasicType()) + .INPUT(begin, TensorType::IndexNumberType()) + .INPUT(end, TensorType::IndexNumberType()) + .OPTIONAL_INPUT(axes, TensorType::IndexNumberType()) + .OPTIONAL_INPUT(strides, TensorType::IndexNumberType()) + .OUTPUT(y, TensorType::BasicType()) + .OP_END_FACTORY_REG(StridedSliceV3) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index 3560db11..525f60e9 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -368,8 +368,9 @@ REG_OP(SpaceToDepth) * complex128, uint32, uint64 *@par Attributes: -*Two attributes, including: +*Three attributes, including: * @li block_size: An int >= 2, specifying the size of the spatial block. +* @li mode: An optional string, specifying the mode. Defaults to "DCR". * @li data_format: An optional string, specifying the data format. Defaults to "NHWC" . \n *@par Outputs: @@ -382,6 +383,7 @@ REG_OP(DepthToSpace) .INPUT(x, TensorType::BasicType()) .OUTPUT(y, TensorType::BasicType()) .REQUIRED_ATTR(block_size, Int) + .ATTR(mode, String, "DCR") .ATTR(data_format, String, "NHWC") .OP_END_FACTORY_REG(DepthToSpace) diff --git a/third_party/fwkacllib/inc/ops/vector_search.h b/third_party/fwkacllib/inc/ops/vector_search.h index e3099511..d07f8cd6 100644 --- a/third_party/fwkacllib/inc/ops/vector_search.h +++ b/third_party/fwkacllib/inc/ops/vector_search.h @@ -34,7 +34,7 @@ namespace ge { * @li bucket_list: A Tensor. Must be one of the following types: int32, int64. * * @par Outputs: -* @li adc_tables: A Tensor. Must be one of the following types: float16, float32. +* adc_tables: A Tensor. Must be one of the following types: float16, float32. */ REG_OP(GenADC) .INPUT(query, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -43,6 +43,87 @@ REG_OP(GenADC) .INPUT(bucket_list, TensorType({DT_INT32, DT_INT64})) .OUTPUT(adc_tables, TensorType({DT_FLOAT16, DT_FLOAT})) .OP_END_FACTORY_REG(GenADC) + +/** +* @brief Finds values and indices of the "k" largest or least elements for the last dimension. \n +* +* @par Inputs: +* Dynamin inputs, including: +* @li actual_count: A Tensor of type int32, the actual number of pq_distance. +* @li pq_distance: A Tensor, Will be updated after calculation. Must be one of the following types: float32, float16. +* @li grouped_extreme_distance: A Tensor, the extremum in each group. Must be one of the following types: float32, float16. +* @li pq_index: A Tensor of type int32, index corresponding to pq_distance. +* @li pq_ivf: A Tensor of type int32 , the bucket number corresponding to pq_distance. +* +* @par Attributes: +* @li order: A string, indicates the sorting method of topk_pq_distance. \n +* @li k: Int, k maximum or minimum values. \n +* @li group_size: Int, the group size of the extremum. \n +* +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(TopKPQDistance) + .DYNAMIC_INPUT(actual_count, TensorType({DT_INT32})) + .DYNAMIC_INPUT(pq_distance, TensorType({DT_FLOAT16, DT_FLOAT})) + .DYNAMIC_INPUT(grouped_extreme_distance, TensorType({DT_FLOAT16, DT_FLOAT})) + .DYNAMIC_INPUT(pq_ivf, TensorType({DT_INT32})) + .DYNAMIC_INPUT(pq_index, TensorType({DT_INT32})) + .OUTPUT(topk_distance, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(topk_ivf, TensorType({DT_INT32})) + .OUTPUT(topk_index, TensorType({DT_INT32})) + .ATTR(order, String, "ASC") + .ATTR(k, Int, 0) + .ATTR(group_size, Int, 0) + .OP_END_FACTORY_REG(TopKPQDistance) + +/** +* @brief Calculate PQ distance. \n +* +* @par Inputs: +* Six inputs, including: +* @li ivf: A Tensor, dtype is uint8. +* @li bucket_list: A Tensor, dtype is int32. +* @li bucket_base_distance: A Tensor, dtype is float16. +* @li bucket_limits: A Tensor, dtype is int32. +* @li bucket_offsets: A Tensor, dtype is int32. +* @li adc_tables: A Tensor. dtype is float16. \n +* +* @par Outputs: +* Five outputs, including: +* @li actual_count: A Tensor, dtype is int32, the first element means the length of processed ivf. +* @li pq_distance: A Tensor, dtype is float16. +* @li grouped_extreme_distance: A Tensor, dtype is float16. +* @li pq_ivf: A Tensor, dtype is int32. +* @li pq_index: A Tensor, dtype is int32. \n +* +* @par Attributes: +* Five attributes, including: +* @li group_size: A Scalar, indicates the group size when compute grouped_extreme_distance. +* @li total_limit: A Scalar, indicates the total length of the outputs. +* @li extreme_mode: A Scalar, indicates the type of extremum, 0 means minimum, and 1 means maximum. +* @li split_count: A Scalar. +* @li split_index: A Scalar. \n +* +*/ +REG_OP(ScanPQCodes) + .INPUT(ivf, TensorType({DT_UINT8})) + .INPUT(bucket_list, TensorType({DT_INT32, DT_INT64})) + .INPUT(bucket_base_distance, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(bucket_limits, TensorType({DT_INT32})) + .INPUT(bucket_offsets, TensorType({DT_INT64})) + .INPUT(adc_tables, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(actual_count, TensorType({DT_INT32})) + .OUTPUT(pq_distance, TensorType({DT_FLOAT16})) + .OUTPUT(grouped_extreme_distance, TensorType({DT_FLOAT16})) + .OUTPUT(pq_ivf, TensorType({DT_INT32})) + .OUTPUT(pq_index, TensorType({DT_INT32})) + .REQUIRED_ATTR(total_limit, Int) + .ATTR(group_size, Int, 64) + .ATTR(extreme_mode, Int, 0) + .ATTR(split_count, Int, 1) + .ATTR(split_index, Int, 0) + .OP_END_FACTORY_REG(ScanPQCodes) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_ diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index fc2cd038..4f9b15be 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -42,7 +42,7 @@ static const int32_t RT_ERROR_NONE = 0; // success */ typedef enum tagRtDeviceMode { RT_DEVICE_MODE_SINGLE_DIE = 0, - RT_DEVICE_MODE_MULTI_DIE = 1, + RT_DEVICE_MODE_MULTI_DIE, RT_DEVICE_MODE_RESERVED } rtDeviceMode; @@ -178,7 +178,7 @@ RTS_API rtError_t rtProfilerInit(const char *profDir, const char *address, const * @ingroup profiling_base * @brief config rts profiler. */ -RTS_API rtError_t rtProfilerConfig(uint16_t type); +RTS_API rtError_t rtProfilerConfig(uint16_t profConfig); /** * @ingroup profiling_base @@ -251,18 +251,6 @@ RTS_API rtError_t rtProfRegisterCtrlCallback(uint32_t moduleId, rtProfCtrlHandle /** * @ingroup dvrt_base - * @brief Returns the last error from a runtime call. - */ -RTS_API rtError_t rtGetLastError(); - -/** - * @ingroup dvrt_base - * @brief Returns the last error from a runtime call. - */ -RTS_API rtError_t rtPeekAtLastError(); - -/** - * @ingroup dvrt_base * @brief register callback for error code * @param [out] NA * @return RT_ERROR_NONE for ok diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index 76836e7b..d8083def 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __CCE_RUNTIME_CONFIG_H__ -#define __CCE_RUNTIME_CONFIG_H__ +#ifndef CCE_RUNTIME_CONFIG_H +#define CCE_RUNTIME_CONFIG_H #include "base.h" @@ -23,28 +23,28 @@ extern "C" { #endif -#define PLAT_COMBINE(arch, chip, ver) ((arch << 16) | (chip << 8) | (ver)) -#define PLAT_GET_ARCH(type) ((type >> 16) & 0xffff) -#define PLAT_GET_CHIP(type) ((type >> 8) & 0xff) -#define PLAT_GET_VER(type) (type & 0xff) +#define PLAT_COMBINE(arch, chip, ver) (((arch) << 16U) | ((chip) << 8U) | (ver)) +#define PLAT_GET_ARCH(type) (((type) >> 16U) & 0xffffU) +#define PLAT_GET_CHIP(type) (((type) >> 8U) & 0xffU) +#define PLAT_GET_VER(type) ((type) & 0xffU) typedef enum tagRtArchType { ARCH_BEGIN = 0, ARCH_V100 = ARCH_BEGIN, - ARCH_V200, - ARCH_END, + ARCH_V200 = 1, + ARCH_END = 2, } rtArchType_t; typedef enum tagRtChipType { CHIP_BEGIN = 0, CHIP_MINI = CHIP_BEGIN, - CHIP_CLOUD, - CHIP_MDC, - CHIP_LHISI, - CHIP_DC, - CHIP_CLOUD_V2, - CHIP_NO_DEVICE, - CHIP_END, + CHIP_CLOUD = 1, + CHIP_MDC = 2, + CHIP_LHISI = 3, + CHIP_DC = 4, + CHIP_CLOUD_V2 = 5, + CHIP_NO_DEVICE = 6, + CHIP_END = 7, } rtChipType_t; typedef enum tagRtAicpuScheType { @@ -59,29 +59,32 @@ typedef enum tagRtDeviceCapabilityType { RT_SCHEDULE_HARDWARE, // HWTS Schedule RT_AICPU_BLOCKING_OP_NOT_SUPPORT, RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation + RT_MODE_NO_FFTS, // no ffts + RT_MODE_FFTS, // 1981 get ffts work mode, ffts + RT_MODE_FFTS_PLUS, // 1981 get ffts work mode, ffts plus } rtDeviceCapabilityType; typedef enum tagRtVersion { VER_BEGIN = 0, VER_NA = VER_BEGIN, - VER_ES, - VER_CS, - VER_SD3403, - VER_END, + VER_ES = 1, + VER_CS = 2, + VER_SD3403 = 3, + VER_END = 4, } rtVersion_t; /* match rtChipType_t */ typedef enum tagRtPlatformType { PLATFORM_BEGIN = 0, PLATFORM_MINI_V1 = PLATFORM_BEGIN, - PLATFORM_CLOUD_V1, - PLATFORM_MINI_V2, - PLATFORM_LHISI_ES, - PLATFORM_LHISI_CS, - PLATFORM_DC, - PLATFORM_CLOUD_V2, - PLATFORM_LHISI_SD3403, - PLATFORM_END, + PLATFORM_CLOUD_V1 = 1, + PLATFORM_MINI_V2 = 2, + PLATFORM_LHISI_ES = 3, + PLATFORM_LHISI_CS = 4, + PLATFORM_DC = 5, + PLATFORM_CLOUD_V2 = 6, + PLATFORM_LHISI_SD3403 = 7, + PLATFORM_END = 8, } rtPlatformType_t; typedef enum tagRtCubeFracMKNFp16 { @@ -240,4 +243,4 @@ RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout); } #endif -#endif // __CCE_RUNTIME_STREAM_H__ +#endif // CCE_RUNTIME_CONFIG_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h index bb6bf111..947ed093 100644 --- a/third_party/fwkacllib/inc/runtime/context.h +++ b/third_party/fwkacllib/inc/runtime/context.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __CCE_RUNTIME_CONTEXT_H__ -#define __CCE_RUNTIME_CONTEXT_H__ +#ifndef CCE_RUNTIME_CONTEXT_H +#define CCE_RUNTIME_CONTEXT_H #include "base.h" @@ -173,4 +173,4 @@ RTS_API rtError_t rtSetCtxINFMode(bool mode); #endif -#endif // __CCE_RUNTIME_CONTEXT_H__ +#endif // CCE_RUNTIME_CONTEXT_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index 3d3da22e..98975f70 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __CCE_RUNTIME_DEVICE_H__ -#define __CCE_RUNTIME_DEVICE_H__ +#ifndef CCE_RUNTIME_DEVICE_H +#define CCE_RUNTIME_DEVICE_H #include "base.h" @@ -23,8 +23,8 @@ extern "C" { #endif -#define RT_CAPABILITY_SUPPORT (0x1) -#define RT_CAPABILITY_NOT_SUPPORT (0x0) +#define RT_CAPABILITY_SUPPORT (0x1U) +#define RT_CAPABILITY_NOT_SUPPORT (0x0U) typedef struct tagRTDeviceInfo { uint8_t env_type; // 0: FPGA 1: EMU 2: ESL @@ -45,27 +45,28 @@ typedef struct tagRTDeviceInfo { typedef enum tagRtRunMode { RT_RUN_MODE_OFFLINE = 0, - RT_RUN_MODE_ONLINE = 1, - RT_RUN_MODE_AICPU_SCHED = 2, + RT_RUN_MODE_ONLINE, + RT_RUN_MODE_AICPU_SCHED, RT_RUN_MODE_RESERVED } rtRunMode; typedef enum tagRtAicpuDeployType { AICPU_DEPLOY_CROSS_OS = 0x0, - AICPU_DEPLOY_CROSS_PROCESS = 0x1, - AICPU_DEPLOY_CROSS_THREAD = 0x2, + AICPU_DEPLOY_CROSS_PROCESS, + AICPU_DEPLOY_CROSS_THREAD, AICPU_DEPLOY_RESERVED } rtAicpuDeployType_t; typedef enum tagRtFeatureType { FEATURE_TYPE_MEMCPY = 0, - FEATURE_TYPE_MEMORY = 1, + FEATURE_TYPE_MEMORY, FEATURE_TYPE_RSV } rtFeatureType_t; typedef enum tagRtDeviceFeatureType { FEATURE_TYPE_SCHE, FEATURE_TYPE_BLOCKING_OPERATOR, + FEATURE_TYPE_FFTS_MODE, FEATURE_TYPE_END, } rtDeviceFeatureType_t; @@ -90,6 +91,15 @@ typedef enum tagRtDeviceModuleType { RT_MODULE_TYPE_VECTOR_CORE, /**< VECTOR CORE info*/ } rtDeviceModuleType_t; +// used for rtGetDevMsg callback function +typedef void (*rtGetMsgCallback)(const char *msg, uint32_t len); + +typedef enum tagGetDevMsgType { + RT_GET_DEV_ERROR_MSG = 0, + RT_GET_DEV_RUNNING_STREAM_SNAPSHOT_MSG, + RT_GET_DEV_MSG_RESERVE +} rtGetDevMsgType_t; + /** * @ingroup dvrt_dev * @brief get total device number. @@ -408,8 +418,17 @@ RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device); */ RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device); +/** + * @ingroup dvrt_dev + * @brief get device message + * @param [in] rtGetDevMsgType_t getMsgType:msg type + * @param [in] GetMsgCallback callback:acl callback function + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtGetDevMsg(rtGetDevMsgType_t getMsgType, rtGetMsgCallback callback); #if defined(__cplusplus) } #endif -#endif // __CCE_RUNTIME_DEVICE_H__ +#endif // CCE_RUNTIME_DEVICE_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/dvfsprofile.h b/third_party/fwkacllib/inc/runtime/dvfsprofile.h index 33e2f4c1..b0caaf2d 100644 --- a/third_party/fwkacllib/inc/runtime/dvfsprofile.h +++ b/third_party/fwkacllib/inc/runtime/dvfsprofile.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __CCE_RUNTIME_DVFSPROFILE_H__ -#define __CCE_RUNTIME_DVFSPROFILE_H__ +#ifndef CCE_RUNTIME_DVFSPROFILE_H +#define CCE_RUNTIME_DVFSPROFILE_H #include "base.h" @@ -60,4 +60,4 @@ RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode); } #endif -#endif // __CCE_RUNTIME_PROFILE_H__ +#endif // CCE_RUNTIME_DVFSPROFILE_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h index 81b635c3..3c1f2670 100644 --- a/third_party/fwkacllib/inc/runtime/event.h +++ b/third_party/fwkacllib/inc/runtime/event.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __CCE_RUNTIME_EVENT_H__ -#define __CCE_RUNTIME_EVENT_H__ +#ifndef CCE_RUNTIME_EVENT_H +#define CCE_RUNTIME_EVENT_H #include "base.h" @@ -33,8 +33,8 @@ typedef enum rtEventWaitStatus { * @ingroup event_flags * @brief event op bit flags */ -#define RT_EVENT_DEFAULT (0x0E) -#define RT_EVENT_WITH_FLAG (0x0B) +#define RT_EVENT_DEFAULT (0x0EU) +#define RT_EVENT_WITH_FLAG (0x0BU) #define RT_EVENT_DDSYNC_NS 0x01U #define RT_EVENT_STREAM_MARK 0x02U @@ -200,14 +200,14 @@ RTS_API rtError_t rtNotifyWait(rtNotify_t notify, rtStream_t stream); /** * @ingroup dvrt_event * @brief Wait for a notify with time out - * @param [in] notify_ notify to be wait - * @param [in] stream_ input stream + * @param [in] notify notify to be wait + * @param [in] stream input stream * @param [in] timeOut input timeOut * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_STREAM_CONTEXT for stream is not in current ctx */ -RTS_API rtError_t rtNotifyWaitWithTimeOut(rtNotify_t notify_, rtStream_t stream_, uint32_t timeOut); +RTS_API rtError_t rtNotifyWaitWithTimeOut(rtNotify_t notify, rtStream_t stream, uint32_t timeOut); /** * @ingroup dvrt_event @@ -270,10 +270,10 @@ RTS_API rtError_t rtNotifyGetAddrOffset(rtNotify_t notify, uint64_t *devAddrOffs * @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_DRV_ERR for driver error */ -RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num); +RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int32_t num); #if defined(__cplusplus) } #endif -#endif // __CCE_RUNTIME_EVENT_H__ +#endif // CCE_RUNTIME_EVENT_H diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index f33b51d3..a0ccff73 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __CCE_RUNTIME_KERNEL_H__ -#define __CCE_RUNTIME_KERNEL_H__ +#ifndef CCE_RUNTIME_KERNEL_H +#define CCE_RUNTIME_KERNEL_H #include "base.h" #include "stream.h" @@ -131,7 +131,10 @@ typedef struct tagRtArgsWithTiling { uint32_t argsSizeWithoutTiling; // input + output + tiling addr size uint16_t tilingAddrOffset; // tiling addr offset uint16_t tilingDataOffset; // tiling data offset - uint16_t reserved[2]; + uint16_t hostInputAddrOffset; // index of host_memory input in inputs_addrs list + uint16_t hostInputDataOffset; // host_mem input data offset + bool hasHostMemInput; // has host_memory input data in args or not: ture or false + uint8_t reserved[7]; } rtArgsWithTiling_t; /** @@ -141,7 +144,7 @@ typedef struct tagRtArgsWithTiling { typedef enum tagRtDumpKind { RT_DATA_DUMP_KIND_INVALID = -1, RT_DATA_DUMP_KIND_DUMP = 0, - RT_DATA_DUMP_KIND_RESERVED + RT_DATA_DUMP_KIND_RESERVED = 1, } rtDumpKind_t; /** @@ -160,72 +163,72 @@ typedef void (*rtCallback_t)(void *fnData); * @ingroup rt_kernel * @brief magic number of plain binary for aicore */ -#define RT_DEV_BINARY_MAGIC_PLAIN 0xabceed50 +#define RT_DEV_BINARY_MAGIC_PLAIN 0xabceed50U /** * @ingroup rt_kernel * @brief magic number of plain binary for aicpu */ -#define RT_DEV_BINARY_MAGIC_PLAIN_AICPU 0xabceed51 +#define RT_DEV_BINARY_MAGIC_PLAIN_AICPU 0xabceed51U /** * @ingroup rt_kernel * @brief magic number of plain binary for aivector */ -#define RT_DEV_BINARY_MAGIC_PLAIN_AIVEC 0xabceed52 +#define RT_DEV_BINARY_MAGIC_PLAIN_AIVEC 0xabceed52U /** * @ingroup rt_kernel * @brief magic number of elf binary for aicore */ -#define RT_DEV_BINARY_MAGIC_ELF 0x43554245 +#define RT_DEV_BINARY_MAGIC_ELF 0x43554245U /** * @ingroup rt_kernel * @brief magic number of elf binary for aicpu */ -#define RT_DEV_BINARY_MAGIC_ELF_AICPU 0x41415243 +#define RT_DEV_BINARY_MAGIC_ELF_AICPU 0x41415243U /** * @ingroup rt_kernel * @brief magic number of elf binary for aivector */ -#define RT_DEV_BINARY_MAGIC_ELF_AIVEC 0x41415246 +#define RT_DEV_BINARY_MAGIC_ELF_AIVEC 0x41415246U /** * @ingroup rt_kernel * @brief magic number of elf binary for aicube */ -#define RT_DEV_BINARY_MAGIC_ELF_AICUBE 0x41494343 +#define RT_DEV_BINARY_MAGIC_ELF_AICUBE 0x41494343U /** * @ingroup rt_kernel_flags * @brief kernel op bit flags */ -#define RT_KERNEL_DEFAULT (0x00) -#define RT_KERNEL_CONVERT (0x01) -#define RT_KERNEL_DUMPFLAG (0x02) -#define RT_FUSION_KERNEL_DUMPFLAG (0x04) -#define RT_KERNEL_CUSTOM_AICPU (0x08) +#define RT_KERNEL_DEFAULT (0x00U) +#define RT_KERNEL_CONVERT (0x01U) +#define RT_KERNEL_DUMPFLAG (0x02U) +#define RT_FUSION_KERNEL_DUMPFLAG (0x04U) +#define RT_KERNEL_CUSTOM_AICPU (0x08U) // STARS topic scheduler sqe : topic_type -#define RT_KERNEL_DEVICE_FIRST (0x10) -#define RT_KERNEL_HOST_ONLY (0x20) -#define RT_KERNEL_HOST_FIRST (0x40) +#define RT_KERNEL_DEVICE_FIRST (0x10U) +#define RT_KERNEL_HOST_ONLY (0x20U) +#define RT_KERNEL_HOST_FIRST (0x40U) /** * @ingroup rt_kernel * @brief kernel mode **/ -#define RT_DEFAULT_KERNEL_MODE (0x00) -#define RT_NORMAL_KERNEL_MODE (0x01) -#define RT_ALL_KERNEL_MODE (0x02) +#define RT_DEFAULT_KERNEL_MODE (0x00U) +#define RT_NORMAL_KERNEL_MODE (0x01U) +#define RT_ALL_KERNEL_MODE (0x02U) /** * @ingroup rt_kernel * @brief kernel L1 Fusion Dump bit flags */ -#define RT_DDR_ADDR (0x0) +#define RT_DDR_ADDR (0x0U) /** * @ingroup rt_kernel @@ -672,7 +675,7 @@ RTS_API rtError_t rtStopMDCProfiler(void *addr); * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockDim, - rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream_); + rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream); /** * @ingroup rt_kernel @@ -688,11 +691,11 @@ RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockD * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtKernelLaunchWithHandleAndTiling(void *handle, const void *devFunc, uint32_t blockDim, - rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream_, const void* kernelInfo); + rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream, const void* kernelInfo); #if defined(__cplusplus) } #endif -#endif // __CCE_RUNTIME_KERNEL_H__ +#endif // CCE_RUNTIME_KERNEL_H diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index b049e762..473a203a 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -14,12 +14,10 @@ * limitations under the License. */ -#ifndef __CCE_RUNTIME_MEM_H__ -#define __CCE_RUNTIME_MEM_H__ +#ifndef CCE_RUNTIME_MEM_H +#define CCE_RUNTIME_MEM_H -/*lint -e7*/ #include -/*lint +e7*/ #include "base.h" #include "config.h" #include "stream.h" @@ -32,43 +30,43 @@ extern "C" { * @ingroup dvrt_mem * @brief memory type */ -#define RT_MEMORY_DEFAULT ((uint32_t)0x0) // default memory on device -#define RT_MEMORY_HBM ((uint32_t)0x2) // HBM memory on device -#define RT_MEMORY_RDMA_HBM ((uint32_t)0x3) // RDMA-HBM memory on device -#define RT_MEMORY_DDR ((uint32_t)0x4) // DDR memory on device -#define RT_MEMORY_SPM ((uint32_t)0x8) // shared physical memory on device -#define RT_MEMORY_P2P_HBM ((uint32_t)0x10) // HBM memory on other 4P device -#define RT_MEMORY_P2P_DDR ((uint32_t)0x11) // DDR memory on other device -#define RT_MEMORY_DDR_NC ((uint32_t)0x20) // DDR memory of non-cache -#define RT_MEMORY_TS_4G ((uint32_t)0x40) -#define RT_MEMORY_TS ((uint32_t)0x80) -#define RT_MEMORY_RESERVED ((uint32_t)0x100) +#define RT_MEMORY_DEFAULT (0x0U) // default memory on device +#define RT_MEMORY_HBM (0x2U) // HBM memory on device +#define RT_MEMORY_RDMA_HBM (0x3U) // RDMA-HBM memory on device +#define RT_MEMORY_DDR (0x4U) // DDR memory on device +#define RT_MEMORY_SPM (0x8U) // shared physical memory on device +#define RT_MEMORY_P2P_HBM (0x10U) // HBM memory on other 4P device +#define RT_MEMORY_P2P_DDR (0x11U) // DDR memory on other device +#define RT_MEMORY_DDR_NC (0x20U) // DDR memory of non-cache +#define RT_MEMORY_TS_4G (0x40U) +#define RT_MEMORY_TS (0x80U) +#define RT_MEMORY_RESERVED (0x100U) -#define RT_MEMORY_L1 ((uint32_t)0x1<<16) -#define RT_MEMORY_L2 ((uint32_t)0x1<<17) +#define RT_MEMORY_L1 (0x1U << 16U) +#define RT_MEMORY_L2 (0x1U << 17U) /** * @ingroup dvrt_mem * @brief memory info type */ -#define RT_MEM_INFO_TYPE_DDR_SIZE ((uint32_t)0x1) -#define RT_MEM_INFO_TYPE_HBM_SIZE ((uint32_t)0x2) -#define RT_MEM_INFO_TYPE_DDR_P2P_SIZE ((uint32_t)0x3) -#define RT_MEM_INFO_TYPE_HBM_P2P_SIZE ((uint32_t)0x4) +#define RT_MEM_INFO_TYPE_DDR_SIZE (0x1U) +#define RT_MEM_INFO_TYPE_HBM_SIZE (0x2U) +#define RT_MEM_INFO_TYPE_DDR_P2P_SIZE (0x3U) +#define RT_MEM_INFO_TYPE_HBM_P2P_SIZE (0x4U) /** * @ingroup dvrt_mem * @brief memory Policy */ -#define RT_MEMORY_POLICY_NONE ((uint32_t)0x0) // Malloc mem prior hage page, then default page -#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST ((uint32_t)0x1 << 10) // Malloc mem prior hage page, then default page -#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY ((uint32_t)0x1 << 11) // Malloc mem only use hage page -#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY ((uint32_t)0x1 << 12) // Malloc mem only use default page -#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P ((uint32_t)0x1 << 13) // Malloc mem prior hage page, then default page, use for p2p -#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P ((uint32_t)0x1 << 14) // Malloc mem only use hage page, use for p2p -#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P ((uint32_t)0x1 << 15) // Malloc mem only use default page, use for p2p +#define RT_MEMORY_POLICY_NONE (0x0U) // Malloc mem prior huge page, then default page +#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST (0x1U << 10U) // Malloc mem prior huge page, then default page +#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY (0x1U << 11U) // Malloc mem only use huge page +#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY (0x1U << 12U) // Malloc mem only use default page +#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P (0x1U << 13U) // Malloc mem prior huge page, then default page, for p2p +#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P (0x1U << 14U) // Malloc mem only use huge page, use for p2p +#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P (0x1U << 15U) // Malloc mem only use default page, use for p2p -#define MEM_ALLOC_TYPE_BIT ((uint32_t)0x3FF) // mem type bit in <0, 9> +#define MEM_ALLOC_TYPE_BIT (0x3FFU) // mem type bit in <0, 9> /** * @ingroup dvrt_mem @@ -80,10 +78,10 @@ typedef uint32_t rtMemType_t; * @ingroup dvrt_mem * @brief memory advise type */ -#define RT_MEMORY_ADVISE_EXE (0x02) -#define RT_MEMORY_ADVISE_THP (0x04) -#define RT_MEMORY_ADVISE_PLE (0x08) -#define RT_MEMORY_ADVISE_PIN (0x16) +#define RT_MEMORY_ADVISE_EXE (0x02U) +#define RT_MEMORY_ADVISE_THP (0x04U) +#define RT_MEMORY_ADVISE_PLE (0x08U) +#define RT_MEMORY_ADVISE_PIN (0x16U) /** * @ingroup dvrt_mem @@ -119,7 +117,7 @@ typedef enum tagRtRecudeKind { RT_MEMCPY_SDMA_AUTOMATIC_MAX = 11, RT_MEMCPY_SDMA_AUTOMATIC_MIN = 12, RT_MEMCPY_SDMA_AUTOMATIC_EQUAL = 13, - RT_RECUDE_KIND_END + RT_RECUDE_KIND_END = 14, } rtRecudeKind_t; typedef enum tagRtDataType { @@ -134,7 +132,7 @@ typedef enum tagRtDataType { RT_DATA_TYPE_UINT8 = 8, // uint8 RT_DATA_TYPE_UINT16= 9, // uint16 RT_DATA_TYPE_UINT32= 10,// uint32 - RT_DATA_TYPE_END + RT_DATA_TYPE_END = 11, } rtDataType_t; /** @@ -197,7 +195,7 @@ typedef struct rtMallocHostSharedMemoryIn { } rtMallocHostSharedMemoryIn; typedef struct rtMallocHostSharedMemoryOut { - int fd; + int32_t fd; void *ptr; void *devPtr; } rtMallocHostSharedMemoryOut; @@ -205,7 +203,7 @@ typedef struct rtMallocHostSharedMemoryOut { typedef struct rtFreeHostSharedMemoryIn { const char *name; const uint64_t size; - int fd; + int32_t fd; void *ptr; void *devPtr; } rtFreeHostSharedMemoryIn; @@ -384,6 +382,39 @@ RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, ui /** * @ingroup dvrt_mem + * @brief synchronized memcpy2D + * @param [in] dst destination address pointer + * @param [in] dstPitch pitch of destination memory + * @param [in] src source address pointer + * @param [in] srcPitch pitch of source memory + * @param [in] width width of matrix transfer + * @param [in] height height of matrix transfer + * @param [in] kind memcpy type + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtMemcpy2d(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width, + uint64_t height, rtMemcpyKind_t kind); + +/** + * @ingroup dvrt_mem + * @brief asynchronized memcpy2D + * @param [in] dst destination address pointer + * @param [in] dstPitch length of destination address memory + * @param [in] src source address pointer + * @param [in] srcPitch length of destination address memory + * @param [in] width width of matrix transfer + * @param [in] height height of matrix transfer + * @param [in] kind memcpy type + * @param [in] stream asynchronized task stream + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtMemcpy2dAsync(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width, + uint64_t height, rtMemcpyKind_t kind, rtStream_t stream); + +/** + * @ingroup dvrt_mem * @brief query memory size * @param [in] aiCoreMemorySize * @return RT_ERROR_NONE for ok, errno for failed @@ -429,22 +460,22 @@ RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t value, uin /** * @ingroup dvrt_mem * @brief get current device memory total and free - * @param [out] free - * @param [out] total + * @param [out] freeSize + * @param [out] totalSize * @return RT_ERROR_NONE for ok, errno for failed * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtMemGetInfo(size_t *free, size_t *total); +RTS_API rtError_t rtMemGetInfo(size_t *freeSize, size_t *totalSize); /** * @ingroup dvrt_mem * @brief get current device memory total and free * @param [in] memInfoType - * @param [out] free - * @param [out] total + * @param [out] freeSize + * @param [out] totalSize * @return RT_ERROR_NONE for ok, errno for failed */ -RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *free, size_t *total); +RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *freeSize, size_t *totalSize); /** * @ingroup dvrt_mem @@ -551,4 +582,4 @@ RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t str } #endif -#endif // __CCE_RUNTIME_MEM_H__ +#endif // CCE_RUNTIME_MEM_H diff --git a/third_party/fwkacllib/inc/runtime/rt.h b/third_party/fwkacllib/inc/runtime/rt.h index 6c2f5318..8c236dcd 100644 --- a/third_party/fwkacllib/inc/runtime/rt.h +++ b/third_party/fwkacllib/inc/runtime/rt.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __CCE_RUNTIME_RT_H__ -#define __CCE_RUNTIME_RT_H__ +#ifndef CCE_RUNTIME_RT_H +#define CCE_RUNTIME_RT_H #include "base.h" #include "config.h" @@ -32,4 +32,4 @@ #include "rt_ffts_plus.h" #include "rt_dfx.h" -#endif // __CCE_RUNTIME_RT_H__ +#endif // CCE_RUNTIME_RT_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts.h b/third_party/fwkacllib/inc/runtime/rt_ffts.h index f2809218..11164757 100644 --- a/third_party/fwkacllib/inc/runtime/rt_ffts.h +++ b/third_party/fwkacllib/inc/runtime/rt_ffts.h @@ -3,8 +3,8 @@ * Description: ffts interface */ -#ifndef __CCE_RUNTIME_FFTS_H -#define __CCE_RUNTIME_FFTS_H +#ifndef CCE_RUNTIME_RT_FFTS_H +#define CCE_RUNTIME_RT_FFTS_H #include "base.h" @@ -33,7 +33,7 @@ typedef enum tagFftsSubTaskType { RT_FFTS_SUB_TASK_TYPE_MIX_AIC = 6, RT_FFTS_SUB_TASK_TYPE_MIX_AIV = 7, RT_FFTS_SUB_TASK_TYPE_SDMA = 8, - RT_FFTS_SUB_TASK_TYPE_RESERVED, + RT_FFTS_SUB_TASK_TYPE_RESERVED = 9, } rtFftsSubTaskType_t; typedef struct tagManualThreadDmuInfo { @@ -178,7 +178,9 @@ typedef struct tagFftsTaskInfo { RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream); +RTS_API rtError_t rtFftsTaskLaunchWithFlag(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream, uint32_t flag); + #if defined(__cplusplus) } #endif -#endif // __CCE_RUNTIME_FFTS_H +#endif // CCE_RUNTIME_RT_FFTS_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h b/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h index 61eee9f3..343701a2 100644 --- a/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h +++ b/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h @@ -3,8 +3,8 @@ * Description: ffts plus interface */ -#ifndef __CCE_RUNTIME_FFTS_PLUS_H -#define __CCE_RUNTIME_FFTS_PLUS_H +#ifndef CCE_RUNTIME_RT_FFTS_PLUS_H +#define CCE_RUNTIME_RT_FFTS_PLUS_H #include "base.h" #include "rt_ffts_plus_define.h" @@ -26,9 +26,13 @@ typedef struct tagFftsPlusTaskInfo { #pragma pack(pop) RTS_API rtError_t rtGetAddrAndPrefCntWithHandle(void *handle, const void *devFunc, void **addr, uint32_t *prefetchCnt); + RTS_API rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stream); +RTS_API rtError_t rtFftsPlusTaskLaunchWithFlag(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stream, + uint32_t flag); + #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif -#endif // __CCE_RUNTIME_FFTS_H +#endif // CCE_RUNTIME_RT_FFTS_PLUS_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h b/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h index 9887b943..36276b4c 100644 --- a/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h +++ b/third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h @@ -3,8 +3,8 @@ * Description: the definition of ffts plus */ -#ifndef __CCE_RUNTIME_FFTS_PLUS_DEFINE_H -#define __CCE_RUNTIME_FFTS_PLUS_DEFINE_H +#ifndef CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H +#define CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H #include "base.h" @@ -30,7 +30,7 @@ typedef enum tagFftsPlusHwType { RT_HW_CTX_TYPE_WRITEBACK_DATA = 11, RT_HW_CTX_TYPE_AICPU = 12, RT_HW_CTX_TYPE_LOAD = 13, - RT_HW_CTX_TYPE_MAX, + RT_HW_CTX_TYPE_MAX = 14, } rtFftsPlusHwType_t; // hardware context type @@ -40,7 +40,7 @@ typedef enum tagFftsPlusSoftType { RT_SOFT_CTX_TYPE_AT_START = 3, RT_SOFT_CTX_TYPE_AT_END = 4, RT_SOFT_CTX_TYPE_LABEL = 5, - RT_SOFT_CTX_TYPE_MAX, + RT_SOFT_CTX_TYPE_MAX = 6, } rtFftsPlusSoftType_t; typedef enum tagFftsPlusContextType { @@ -71,7 +71,7 @@ typedef enum tagFftsPlusCondType { RT_COND_TYPE_GREATER_OR_EQUAL = 3, RT_COND_TYPE_LESS = 4, RT_COND_TYPE_LESS_OR_EQUAL = 5, - RT_COND_TYPE_MAX, + RT_COND_TYPE_MAX = 6, } rtFftsPlusCondType_t; // the definition of ffts plus context @@ -505,7 +505,7 @@ typedef struct tagFftsPlusAtStartCtx { uint16_t threadIdInit; uint16_t threadWindowSize; // 80-127 - uint16_t res9[12]; + uint32_t res9[12]; } rtFftsPlusAtStartCtx_t; // at end context @@ -712,4 +712,4 @@ typedef struct tagFftsPlusCondSwitchCtx { #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif -#endif // __CCE_RUNTIME_FFTS_PLUS_DEFINE_H +#endif // CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/rt_mem_queue.h b/third_party/fwkacllib/inc/runtime/rt_mem_queue.h new file mode 100644 index 00000000..70bfb9f3 --- /dev/null +++ b/third_party/fwkacllib/inc/runtime/rt_mem_queue.h @@ -0,0 +1,416 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. + * Description: mbuf and queue interface + */ + +#ifndef CCE_RUNTIME_RT_MEM_QUEUE_H +#define CCE_RUNTIME_RT_MEM_QUEUE_H + +#include "base.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define RT_MQ_MAX_NAME_LEN 128 // same as driver's +#define RT_MQ_DEPTH_MIN 2U +#define RT_MQ_MODE_PUSH 1 +#define RT_MQ_MODE_PULL 2 +#define RT_MQ_MODE_DEFAULT RT_MQ_MODE_PUSH + +typedef struct tagMemQueueAttr { + char name[RT_MQ_MAX_NAME_LEN]; + uint32_t depth; + uint32_t workMode; + uint32_t flowCtrlDropTime; + bool flowCtrlFlag; + bool overWriteFlag; +} rtMemQueueAttr_t; + +typedef struct tagMemQueueShareAttr { + uint32_t manage : 1; + uint32_t read : 1; + uint32_t write : 1; + uint32_t rsv : 29; +} rtMemQueueShareAttr_t; + +typedef struct tagMemQueueBuffInfo { + void *addr; + size_t len; +} rtMemQueueBuffInfo; + +typedef struct tagMemQueueBuff { + void *contextAddr; + size_t contextLen; + rtMemQueueBuffInfo *buffInfo; + uint32_t buffCount; +} rtMemQueueBuff_t; + + +typedef enum tagMemQueueQueryCmd { + RT_MQ_QUERY_QUE_ATTR_OF_CUR_PROC = 0, // input is qid(4bytes), output is rtMemQueueShareAttr_t + RT_MQ_QUERY_QUES_OF_CUR_PROC = 1, + RT_MQ_QUERY_CMD_MAX = 2 +} rtMemQueueQueryCmd_t; + +#define RT_MQ_EVENT_QS_MSG 27 // same as driver's + +#define RT_MQ_SCHED_PRIORITY_LEVEL0 0 // same as driver's +#define RT_MQ_SCHED_PRIORITY_LEVEL1 1 +#define RT_MQ_SCHED_PRIORITY_LEVEL2 2 +#define RT_MQ_SCHED_PRIORITY_LEVEL3 3 +#define RT_MQ_SCHED_PRIORITY_LEVEL4 4 +#define RT_MQ_SCHED_PRIORITY_LEVEL5 5 +#define RT_MQ_SCHED_PRIORITY_LEVEL6 6 +#define RT_MQ_SCHED_PRIORITY_LEVEL7 7 + +/* Events can be released between different systems. This parameter specifies the destination type of events + to be released. The destination type is defined based on the CPU type of the destination system. */ +#define RT_MQ_DST_ENGINE_ACPU_DEVICE 0 // device AICPU, same as driver's +#define RT_MQ_DST_ENGINE_ACPU_HOST 1 // Host AICPU +#define RT_MQ_DST_ENGINE_CCPU_DEVICE 2 // device CtrlCPU +#define RT_MQ_DST_ENGINE_CCPU_HOST 3 // Host CtrlCPU +#define RT_MQ_DST_ENGINE_DCPU_DEVICE 4 // device DataCPU +#define RT_MQ_DST_ENGINE_TS_CPU 5 // device TS CPU +#define RT_MQ_DST_ENGINE_DVPP_CPU 6 // device DVPP CPU + +#define RT_MQ_SCHED_EVENT_QS_MSG 25 // same as driver's EVENT_QS_MSG + +/* When the destination engine is AICPU, select a policy. + ONLY: The command is executed only on the local AICPU. + FIRST: The local AICPU is preferentially executed. If the local AICPU is busy, the remote AICPU can be used. */ +#define RT_SCHEDULE_POLICY_ONLY 0 // same as driver's schedule_policy +#define RT_SCHEDULE_POLICY_FIRST 1 // same as driver's schedule_policy + + +typedef struct tagEschedEventSummary { + int32_t pid; // dst PID + uint32_t grpId; + int32_t eventId; // only RT_MQ_SCHED_EVENT_QS_MSG is supported + uint32_t subeventId; + uint32_t msgLen; + char *msg; + uint32_t dstEngine; // dst system cpu type + int32_t policy; // RT_SCHEDULE_POLICY_ONLY or RT_SCHEDULE_POLICY_FIRST +} rtEschedEventSummary_t; + +typedef struct tagEschedEventReply { + char *buf; + uint32_t bufLen; + uint32_t replyLen; // output, ack msg len, same with msgLen in halEschedAckEvent +} rtEschedEventReply_t; + +#define RT_DEV_PROCESS_CP1 0 +#define RT_DEV_PROCESS_CP2 1 +#define RT_DEV_PROCESS_DEV_ONLY 2 +#define RT_DEV_PROCESS_QS 3 +#define RT_DEV_PROCESS_SIGN_LENGTH 49 + +typedef struct tagBindHostpidInfo { + int32_t hostPid; + uint32_t vfid; + uint32_t chipId; + int32_t mode; // online:0, offline:1 + int32_t cpType; // type of custom-process, see RT_DEV_PROCESS_XXX + uint32_t len; // lenth of sign + char sign[RT_DEV_PROCESS_SIGN_LENGTH]; // sign of hostpid +} rtBindHostpidInfo_t; + +#define RT_MEM_BUFF_MAX_CFG_NUM 64 + +typedef struct { + uint32_t cfgId; // cfg id, start from 0 + uint32_t totalSize; // one zone total size + uint32_t blkSize; // blk size, 2^n (0, 2M] + uint32_t maxBufSize; // max size can alloc from zone + uint32_t pageType; // page type, small page / huge page + int32_t elasticEnable; // elastic enable + int32_t elasticRate; + int32_t elasticRateMax; + int32_t elasticHighLevel; + int32_t elasticLowLevel; +} rtMemZoneCfg_t; + +typedef struct { + rtMemZoneCfg_t cfg[RT_MEM_BUFF_MAX_CFG_NUM]; +}rtMemBuffCfg_t; + +typedef void *rtMbufPtr_t; + +/** + * @ingroup rt_mem_queue + * @brief init queue schedule + * @param [in] device the logical device id + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtMemQueueInitQS(int32_t device); + +/** + * @ingroup rt_mem_queue + * @brief create mbuf queue + * @param [in] device the logical device id + * @param [in] rtMemQueueAttr attribute of queue + * @param [out] qid queue id + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtMemQueueCreate(int32_t device, const rtMemQueueAttr_t *queueAttr, uint32_t *qid); + +/** + * @ingroup rt_mem_queue + * @brief destroy mbuf queue + * @param [in] device the logical device id + * @param [in] qid queue id + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtMemQueueDestroy(int32_t device, uint32_t qid); + +/** + * @ingroup rt_mem_queue + * @brief destroy mbuf queue init + * @param [in] device the logical device id + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtMemQueueInit(int32_t device); + +/** + * @ingroup rt_mem_queue + * @brief enqueu mbuf + * @param [in] device the logical device id + * @param [in] qid queue id + * @param [in] mbuf enqueue mbuf + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtMemQueueEnQueue(int32_t device, uint32_t qid, void *mbuf); + + +/** + * @ingroup rt_mem_queue + * @brief enqueu mbuf + * @param [in] device the logical device id + * @param [in] qid queue id + * @param [out] mbuf dequeue mbuf + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtMemQueueDeQueue(int32_t device, uint32_t qid, void **mbuf); + +/** + * @ingroup rt_mem_queue + * @brief enqueu peek + * @param [in] device the logical device id + * @param [in] qid queue id + * @param [out] bufLen length of mbuf in queue + * @param [in] timeout peek timeout (ms), -1: wait all the time until peeking success + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtMemQueuePeek(int32_t device, uint32_t qid, size_t *bufLen, int32_t timeout); + +/** + * @ingroup rt_mem_queue + * @brief enqueu buff + * @param [in] device the logical device id + * @param [in] qid queue id + * @param [in] inBuf enqueue buff + * @param [in] timeout enqueue timeout (ms), -1: wait all the time until enqueue success + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtMemQueueEnQueueBuff(int32_t device, uint32_t qid, rtMemQueueBuff_t *inBuf, int32_t timeout); + +/** + * @ingroup rt_mem_queue + * @brief enqueu buff + * @param [in] device the logical device id + * @param [in] qid queue id + * @param [out] outBuf dequeue buff + * @param [in] timeout dequeue timeout (ms), -1: wait all the time until dequeue success + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtMemQueueDeQueueBuff(int32_t device, uint32_t qid, rtMemQueueBuff_t *outBuf, int32_t timeout); + + +/** +* @ingroup rt_mem_queue +* @brief query queue status +* @param [in] device: the logical device id +* @param [in] cmd: query cmd +* @param [in] inBuff: input buff +* @param [in] inLen: the length of input +* @param [in|out] outBuff: output buff +* @param [in|out] outLen: the length of output +* @return RT_ERROR_NONE for ok +*/ +RTS_API rtError_t rtMemQueueQuery(int32_t device, rtMemQueueQueryCmd_t cmd, const void *inBuff, uint32_t inLen, + void *outBuff, uint32_t *outLen); + +/** +* @ingroup rt_mem_queue +* @brief grant queue +* @param [in] device: logic devid +* @param [in] qid: queue id +* @param [in] pid: pid +* @param [in] attr: queue share attr +* @return RT_ERROR_NONE for ok +*/ +RTS_API rtError_t rtMemQueueGrant(int32_t device, uint32_t qid, int32_t pid, rtMemQueueShareAttr_t *attr); + +/** +* @ingroup rt_mem_queue +* @brief attach queue +* @param [in] device: logic devid +* @param [in] qid: queue id +* @param [in] timeOut: timeOut +* @return RT_ERROR_NONE for ok +*/ +RTS_API rtError_t rtMemQueueAttach(int32_t device, uint32_t qid, int32_t timeOut); + +/** +* @ingroup rt_mem_queue +* @brief Commit the event to a specific process +* @param [in] device: logic devid +* @param [in] event: event summary info +* @param [out] ack: event reply info +* @return RT_ERROR_NONE for ok +*/ +RTS_API rtError_t rtEschedSubmitEventSync(int32_t device, rtEschedEventSummary_t *event, + rtEschedEventReply_t *ack); + +/** +* @ingroup rt_mem_queue +* @brief query device proccess id +* @param [in] info: see struct rtBindHostpidInfo_t +* @param [out] devPid: device proccess id +* @return RT_ERROR_NONE for ok +*/ +RTS_API rtError_t rtQueryDevPid(rtBindHostpidInfo_t *info, int32_t *devPid); + +/** +* @ingroup rt_mem_queue +* @brief device buff init +* @param [in] cfg, init cfg +* @return RT_ERROR_NONE for ok +*/ +RTS_API rtError_t rtMbufInit(rtMemBuffCfg_t *cfg); + +/** +* @ingroup rt_mem_queue +* @brief alloc buff +* @param [out] buff: buff addr alloced +* @param [in] size: The amount of memory space requested +* @return RT_ERROR_NONE for ok +*/ +RTS_API rtError_t rtMbufAlloc(rtMbufPtr_t *mbuf, uint64_t size); + +/** +* @ingroup rt_mem_queue +* @brief free buff +* @param [in] buff: buff addr to be freed +* @return RT_ERROR_NONE for ok +*/ +RTS_API rtError_t rtMbufFree(rtMbufPtr_t mbuf); + +/** +* @ingroup rt_mem_queue +* @brief get Data addr of Mbuf +* @param [in] mbuf: Mbuf addr +* @param [out] buf: Mbuf data addr +* @return RT_ERROR_NONE for ok +*/ +RTS_API rtError_t rtMbufGetBuffAddr(rtMbufPtr_t mbuf, void **buf); + +/** +* @ingroup rt_mem_queue +* @brief get total Buffer size of Mbuf +* @param [in] mbuf: Mbuf addr +* @param [out] totalSize: total buffer size of Mbuf +* @return RT_ERROR_NONE for ok +*/ +RTS_API rtError_t rtMbufGetBuffSize(rtMbufPtr_t mbuf, uint64_t *totalSize); + +/** +* @ingroup rt_mem_queue +* @brief Get the address and length of its user_data from the specified Mbuf +* @param [in] mbuf: Mbuf addr +* @param [out] priv: address of its user_data +* @param [out] size: length of its user_data +* @return RT_ERROR_NONE for ok +*/ +RTS_API rtError_t rtMbufGetPrivInfo (rtMbufPtr_t mbuf, void **priv, uint64_t *size); + +// mem group +typedef struct { + uint64_t maxMemSize; // max buf size in grp, in KB. = 0 means no limit +} rtMemGrpConfig_t; + +typedef struct { + uint32_t admin : 1; // admin permission, can add other proc to grp + uint32_t read : 1; // read only permission + uint32_t write : 1; // read and write permission + uint32_t alloc : 1; // alloc permission (have read and write permission) + uint32_t rsv : 28; +} rtMemGrpShareAttr_t; + +#define RT_MEM_GRP_QUERY_GROUPS_OF_PROCESS 1 // query process all grp + +typedef struct { + int32_t pid; +} rtMemGrpQueryByProc_t; // cmd: GRP_QUERY_GROUPS_OF_PROCESS + +typedef union { + rtMemGrpQueryByProc_t grpQueryByProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS +} rtMemGrpQueryInput_t; + +#define RT_MEM_GRP_NAME_LEN 32 // it must be same as driver define BUFF_GRP_NAME_LEN + +typedef struct { + char groupName[RT_MEM_GRP_NAME_LEN]; // group name + rtMemGrpShareAttr_t attr; // process in group attribute +} rtMemGrpOfProc_t; // cmd: GRP_QUERY_GROUPS_OF_PROCESS + +typedef struct { + rtMemGrpOfProc_t *groupsOfProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS + size_t maxNum; // max number of result + size_t resultNum; // if the number of results exceeds 'maxNum', only 'maxNum' results are filled in buffer +} rtMemGrpQueryOutput_t; + +/** +* @ingroup rt_mem_queue +* @brief create mem group +* @attention null +* @param [in] name, group name +* @param [in] cfg, group cfg +* @return 0 for success, others for fail +*/ +RTS_API rtError_t rtMemGrpCreate(const char *name, const rtMemGrpConfig_t *cfg); + +/** +* @ingroup rt_mem_queue +* @brief add process to group +* @param [in] name, group name +* @param [in] pid, process id +* @param [in] attr, process permission in group +* @return 0 for success, others for fail +*/ +RTS_API rtError_t rtMemGrpAddProc(const char *name, int32_t pid, const rtMemGrpShareAttr_t *attr); + +/** +* @ingroup rt_mem_queue +* @brief attach proccess to check permission in group +* @param [in] name, group name +* @param [in] timeout, time out ms +* @return 0 for success, others for fail +*/ +RTS_API rtError_t rtMemGrpAttach(const char *name, int32_t timeout); + +/** +* @ingroup rt_mem_queue +* @brief buff group query +* @param [in] cmd, cmd type +* @param [in] input, query input +* @param [in|out] output, query output +* @return 0 for success, others for fail +*/ +RTS_API rtError_t rtMemGrpQuery(int32_t cmd, const rtMemGrpQueryInput_t *input, rtMemGrpQueryOutput_t *output); + +#if defined(__cplusplus) +} +#endif +#endif // CCE_RUNTIME_RT_MEM_QUEUE_H diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index d0ffe9c8..d330fe3e 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __CCE_RUNTIME_MODEL_H__ -#define __CCE_RUNTIME_MODEL_H__ +#ifndef CCE_RUNTIME_RT_MODEL_H +#define CCE_RUNTIME_RT_MODEL_H #include "base.h" @@ -42,7 +42,7 @@ typedef enum tagModelTaskType { RT_MODEL_TASK_NOTIFY_WAIT, RT_MODEL_TASK_REDUCE_ASYNC, RT_MODEL_TASK_RDMA_SEND, - RT_MODEL_TASK_EVENT_RESET = 18, + RT_MODEL_TASK_EVENT_RESET, RT_MODEL_TASK_MODEL_END_GRAPH, RT_MODEL_TASK_STREAM_SWITCH_N, RT_MODEL_TASK_RDMA_DB_SEND, @@ -66,16 +66,16 @@ typedef enum tagModelQueueFlag { RT_MODEL_OUTPUT_QUEUE = 1 } rtModelQueueFlag_t; -#define EXECUTOR_NONE ((uint32_t)0x0) -#define EXECUTOR_TS ((uint32_t)0x01) -#define EXECUTOR_AICPU ((uint32_t)0x02) +#define EXECUTOR_NONE (0x0U) +#define EXECUTOR_TS (0x01U) +#define EXECUTOR_AICPU (0x02U) /* * @ingroup rt_model * @brief debug flag for kernel exception dump */ -#define RT_DEBUG_FLAG_AICORE_OVERFLOW (0x1 << 0) -#define RT_DEBUG_FLAG_ATOMIC_ADD_OVERFLOW (0x1 << 1) +#define RT_DEBUG_FLAG_AICORE_OVERFLOW (0x1U << 0U) +#define RT_DEBUG_FLAG_ATOMIC_ADD_OVERFLOW (0x1U << 1U) /** * @ingroup @@ -392,12 +392,12 @@ RTS_API rtError_t rtModelExecute(rtModel_t model, rtStream_t stream, uint32_t fl * @ingroup rt_model * @brief get model the last persist task id * @param [in] model model to execute - * @param [out] taskid last task id of the model - * @param [out] streamid last steam id of the model + * @param [out] taskId last task id of the model + * @param [out] streamId last steam id of the model * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtModelGetTaskId(rtModel_t model, uint32_t *taskid, uint32_t *streamid); +RTS_API rtError_t rtModelGetTaskId(rtModel_t model, uint32_t *taskId, uint32_t *streamId); /** * @ingroup rt_model @@ -495,4 +495,4 @@ RTS_API rtError_t rtDebugUnRegister(rtModel_t model); } #endif -#endif // __CCE_RUNTIME_MODEL_H__ +#endif // CCE_RUNTIME_RT_MODEL_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/rt_stars.h b/third_party/fwkacllib/inc/runtime/rt_stars.h index 016c352a..12b836e2 100644 --- a/third_party/fwkacllib/inc/runtime/rt_stars.h +++ b/third_party/fwkacllib/inc/runtime/rt_stars.h @@ -1,10 +1,10 @@ /* * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. - * Description: + * Description: the definition of stars */ -#ifndef __CCE_RUNTIME_STARS_H -#define __CCE_RUNTIME_STARS_H +#ifndef CCE_RUNTIME_RT_STARS_H +#define CCE_RUNTIME_RT_STARS_H #include "base.h" @@ -84,4 +84,4 @@ RTS_API rtError_t rtCdqEnQueuePtrMode(const char *queName, uint32_t cdqeIndex, c } #endif -#endif // __CCE_RUNTIME_STARS_H +#endif // CCE_RUNTIME_RT_STARS_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/rt_stars_define.h b/third_party/fwkacllib/inc/runtime/rt_stars_define.h index d77a8a8e..208f7aa6 100644 --- a/third_party/fwkacllib/inc/runtime/rt_stars_define.h +++ b/third_party/fwkacllib/inc/runtime/rt_stars_define.h @@ -3,8 +3,8 @@ * Description: the definition of stars */ -#ifndef __CCE_RUNTIME_STARS_DEFINE__H -#define __CCE_RUNTIME_STARS_DEFINE__H +#ifndef CCE_RUNTIME_RT_STARS_DEFINE_H +#define CCE_RUNTIME_RT_STARS_DEFINE_H #include "base.h" @@ -88,4 +88,4 @@ typedef struct tagFftsPlusSqe { #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif -#endif // __CCE_RUNTIME_STARS_DEFINE__H \ No newline at end of file +#endif // CCE_RUNTIME_RT_STARS_DEFINE_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h index 3a078e99..c783b892 100644 --- a/third_party/fwkacllib/inc/runtime/stream.h +++ b/third_party/fwkacllib/inc/runtime/stream.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __CCE_RUNTIME_STREAM_H__ -#define __CCE_RUNTIME_STREAM_H__ +#ifndef CCE_RUNTIME_STREAM_H +#define CCE_RUNTIME_STREAM_H #include "base.h" #include "event.h" @@ -28,27 +28,27 @@ extern "C" { * @ingroup stream_flags * @brief stream op bit flags */ -#define RT_STREAM_DEFAULT (0x00) -#define RT_STREAM_PERSISTENT (0x01) -#define RT_STREAM_FORCE_COPY (0x02) -#define RT_STREAM_HUGE (0x04) -#define RT_STREAM_AICPU (0x08) -#define RT_STREAM_FORBIDDEN_DEFAULT (0x10) -#define RT_STREAM_HEAD (0x20) -#define RT_STREAM_PRIMARY_DEFAULT (0x40) -#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80) +#define RT_STREAM_DEFAULT (0x00U) +#define RT_STREAM_PERSISTENT (0x01U) +#define RT_STREAM_FORCE_COPY (0x02U) +#define RT_STREAM_HUGE (0x04U) +#define RT_STREAM_AICPU (0x08U) +#define RT_STREAM_FORBIDDEN_DEFAULT (0x10U) +#define RT_STREAM_HEAD (0x20U) +#define RT_STREAM_PRIMARY_DEFAULT (0x40U) +#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80U) /** * @ingroup stream_type * @brief stream type */ -#define RT_NORMAL_STREAM (0x00) -#define RT_HUGE_STREAM (0x01) +#define RT_NORMAL_STREAM (0x00U) +#define RT_HUGE_STREAM (0x01U) /** * priority level default value when create a stream */ -#define RT_STREAM_PRIORITY_DEFAULT (0) +#define RT_STREAM_PRIORITY_DEFAULT (0U) /** * @ingroup dvrt_stream @@ -215,4 +215,4 @@ RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stream); } #endif -#endif // __CCE_RUNTIME_STREAM_H__ +#endif // CCE_RUNTIME_STREAM_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h index d65aac83..09a35c5d 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h +++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h @@ -22,18 +22,7 @@ #define PROF_TASK_TIME 0x00000002 #define PROF_AICORE_METRICS 0x00000004 #define PROF_AICPU_TRACE 0x00000008 -#define PROF_MODEL_EXECUTE 0x00000010 -#define PROF_RUNTIME_API 0x00000020 -#define PROF_RUNTIME_TRACE 0x00000040 -#define PROF_SCHEDULE_TIMELINE 0x00000080 -#define PROF_SCHEDULE_TRACE 0x00000100 -#define PROF_AIVECTORCORE_METRICS 0x00000200 -#define PROF_SUBTASK_TIME 0x00000400 - -#define PROF_TRAINING_TRACE 0x00000800 -#define PROF_HCCL_TRACE 0x00001000 - -#define PROF_TASK_TRACE 0x00001852 +#define PROF_L2CACHE 0x00000010 // system profilinig switch #define PROF_CPU 0x00010000 @@ -44,6 +33,19 @@ #define PROF_SYS_AICORE_SAMPLE 0x00200000 #define PROF_AIVECTORCORE_SAMPLE 0x00400000 +#define PROF_MODEL_EXECUTE 0x0000001000000 +#define PROF_RUNTIME_API 0x0000002000000 +#define PROF_RUNTIME_TRACE 0x0000004000000 +#define PROF_SCHEDULE_TIMELINE 0x0000008000000 +#define PROF_SCHEDULE_TRACE 0x0000010000000 +#define PROF_AIVECTORCORE_METRICS 0x0000020000000 +#define PROF_SUBTASK_TIME 0x0000040000000 + +#define PROF_TRAINING_TRACE 0x0000080000000 +#define PROF_HCCL_TRACE 0x0000100000000 + +#define PROF_TASK_TRACE 0x0000185000002 + #define PROF_MODEL_LOAD 0x8000000000000000 // DataTypeConfig MASK @@ -51,16 +53,7 @@ #define PROF_TASK_TIME_MASK 0x00000002 #define PROF_AICORE_METRICS_MASK 0x00000004 #define PROF_AICPU_TRACE_MASK 0x00000008 -#define PROF_MODEL_EXECUTE_MASK 0x00000010 -#define PROF_RUNTIME_API_MASK 0x00000020 -#define PROF_RUNTIME_TRACE_MASK 0x00000040 -#define PROF_SCHEDULE_TIMELINE_MASK 0x00000080 -#define PROF_SCHEDULE_TRACE_MASK 0x00000100 -#define PROF_AIVECTORCORE_METRICS_MASK 0x00000200 -#define PROF_SUBTASK_TIME_MASK 0x00000400 - -#define PROF_TRAINING_TRACE_MASK 0x00000800 -#define PROF_HCCL_TRACE_MASK 0x00001000 +#define PROF_L2CACHE_MASK 0x00000010 // system profilinig mask #define PROF_CPU_MASK 0x00010000 @@ -71,6 +64,17 @@ #define PROF_SYS_AICORE_SAMPLE_MASK 0x00200000 #define PROF_AIVECTORCORE_SAMPLE_MASK 0x00400000 +#define PROF_MODEL_EXECUTE_MASK 0x0000001000000 +#define PROF_RUNTIME_API_MASK 0x0000002000000 +#define PROF_RUNTIME_TRACE_MASK 0x0000004000000 +#define PROF_SCHEDULE_TIMELINE_MASK 0x0000008000000 +#define PROF_SCHEDULE_TRACE_MASK 0x0000010000000 +#define PROF_AIVECTORCORE_METRICS_MASK 0x0000020000000 +#define PROF_SUBTASK_TIME_MASK 0x0000040000000 + +#define PROF_TRAINING_TRACE_MASK 0x0000080000000 +#define PROF_HCCL_TRACE_MASK 0x0000100000000 + #define PROF_MODEL_LOAD_MASK 0x8000000000000000 #ifndef OS_TYPE