Author | SHA1 | Message | Date |
---|---|---|---|
|
9bce7edf7f |
!2089 upgrade ascend 1012
Merge pull request !2089 from yanghaoran/r1.5 |
3 years ago |
|
3b517344f0 | upgrade ascend 1012 | 3 years ago |
|
854631a7b5 |
!2088 upgrade ascend 1009
Merge pull request !2088 from yanghaoran/r1.5 |
3 years ago |
|
051ef7a385 | upgrade ascend 1009 | 3 years ago |
|
d86bc5beb4 |
!2087 upgrade ascend 0928
Merge pull request !2087 from yanghaoran/r1.5 |
3 years ago |
|
7f0b65e699 | upgrade ascend 0928 | 3 years ago |
|
ecf9e8eec7 |
!2086 upgrade ascend 0925
Merge pull request !2086 from yanghaoran/r1.5 |
3 years ago |
|
5e3b7eb856 | upgrade ascend 0925 | 3 years ago |
|
15116d1c0c |
!2085 upgarde ascend 0916
Merge pull request !2085 from yanghaoran/r1.5 |
3 years ago |
|
c28af94028 | upgarde ascend 0916 | 3 years ago |
@@ -34,7 +34,6 @@ void GeModel::Init() { | |||
} | |||
GeModel::GeModel() { | |||
attrs_.InitDefault(); | |||
Init(); | |||
} | |||
@@ -78,12 +77,12 @@ void GeModel::SetPlatformVersion(const std::string &platform_version) { this->pl | |||
void GeModel::SetPlatformType(uint8_t platform_type) { this->platform_type_ = platform_type; } | |||
void GeModel::SetAttr(const ProtoAttrMapHelper &attrs) { attrs_ = attrs; } | |||
void GeModel::SetAttr(const ProtoAttrMap &attrs) { attrs_ = attrs; } | |||
ProtoAttrMapHelper GeModel::MutableAttrMap() { return attrs_; } | |||
ProtoAttrMap &GeModel::MutableAttrMap() { return attrs_; } | |||
ConstProtoAttrMapHelper GeModel::GetAttrMap() const { | |||
return ConstProtoAttrMapHelper(attrs_.GetProtoOwner(), attrs_.GetProtoMsg()); | |||
ConstProtoAttrMap &GeModel::GetAttrMap() const { | |||
return attrs_; | |||
} | |||
Status GeModel::GetSessionId(uint32_t model_id, uint64_t &session_id) const { | |||
@@ -17,10 +17,12 @@ | |||
#ifndef GE_MODEL_GE_MODEL_H_ | |||
#define GE_MODEL_GE_MODEL_H_ | |||
#include <securec.h> | |||
#include <map> | |||
#include <memory> | |||
#include <string> | |||
#include "securec.h" | |||
#include "runtime/rt.h" | |||
#include "common/tbe_kernel_store.h" | |||
#include "common/cust_aicpu_kernel_store.h" | |||
#include "framework/common/debug/log.h" | |||
@@ -60,9 +62,9 @@ class GeModel : public AttrHolder { | |||
void SetPlatformVersion(const std::string &platform_version); | |||
void SetPlatformType(uint8_t platform_type); | |||
void SetAttr(const ProtoAttrMapHelper &attrs); | |||
void SetAttr(const ProtoAttrMap &attrs); | |||
ProtoAttrMapHelper MutableAttrMap() override; | |||
ProtoAttrMap &MutableAttrMap() override; | |||
using AttrHolder::SetAttr; | |||
using AttrHolder::GetAllAttrs; | |||
@@ -77,12 +79,12 @@ class GeModel : public AttrHolder { | |||
} | |||
protected: | |||
ConstProtoAttrMapHelper GetAttrMap() const override; | |||
ConstProtoAttrMap &GetAttrMap() const override; | |||
private: | |||
void Init(); | |||
ProtoAttrMapHelper attrs_; /*lint !e148*/ | |||
ProtoAttrMap attrs_; /*lint !e148*/ | |||
Graph graph_; | |||
std::shared_ptr<domi::ModelTaskDef> task_; /*lint !e148*/ | |||
@@ -5,5 +5,6 @@ approvers: | |||
reviewers: | |||
- justin_zhao | |||
- zhangyongfeng88 | |||
- w00267184 | |||
options: | |||
no_parent_owners: true |
@@ -869,7 +869,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet, | |||
* | |||
* @see aclmdlCreateAIPP | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0, | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t cscSwitch, int16_t cscMatrixR0C0, | |||
int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0, | |||
int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0, | |||
int16_t cscMatrixR2C1, int16_t cscMatrixR2C2, | |||
@@ -1106,7 +1106,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, a | |||
* | |||
* @param modelId [IN] model id | |||
* @param index [IN] index of tensor | |||
* @param aippinfo [OUT] Pointer for static aipp info | |||
* @param aippInfo [OUT] Pointer for static aipp info | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval ACL_ERROR_MODEL_AIPP_NOT_EXIST The tensor of index is not configured with aipp | |||
@@ -1115,7 +1115,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, a | |||
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | |||
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo); | |||
ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippInfo); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -541,7 +541,7 @@ ACL_FUNC_VISIBILITY aclError aclrtSynchronizeEvent(aclrtEvent event); | |||
* | |||
* @see aclrtCreateEvent | aclrtRecordEvent | aclrtSynchronizeStream | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, aclrtEvent end); | |||
ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent startEvent, aclrtEvent endEvent); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -733,6 +733,43 @@ ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const v | |||
/** | |||
* @ingroup AscendCL | |||
* @brief synchronous memory replication of two-dimensional matrix between host and device | |||
* | |||
* @param dst [IN] destination address pointer | |||
* @param dpitch [IN] pitch of destination memory | |||
* @param src [IN] source address pointer | |||
* @param spitch [IN] pitch of source memory | |||
* @param width [IN] width of matrix transfer | |||
* @param height [IN] height of matrix transfer | |||
* @param kind [IN] memcpy type | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtMemcpy2d(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, | |||
size_t height, aclrtMemcpyKind kind); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief asynchronous memory replication of two-dimensional matrix between host and device | |||
* | |||
* @param dst [IN] destination address pointer | |||
* @param dpitch [IN] pitch of destination memory | |||
* @param src [IN] source address pointer | |||
* @param spitch [IN] pitch of source memory | |||
* @param width [IN] width of matrix transfer | |||
* @param height [IN] height of matrix transfer | |||
* @param kind [IN] memcpy type | |||
* @param stream [IN] asynchronized task stream | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtMemcpy2dAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, | |||
size_t height, aclrtMemcpyKind kind, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Asynchronous initialize memory | |||
* and set contents of memory to specified value async | |||
* | |||
@@ -0,0 +1,426 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef INC_EXTERNAL_ACL_ACL_TDT_QUEUE_H_ | |||
#define INC_EXTERNAL_ACL_ACL_TDT_QUEUE_H_ | |||
#include "acl/acl_base.h" | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif | |||
#define ACL_TDT_QUEUE_PERMISSION_MANAGE 1 | |||
#define ACL_TDT_QUEUE_PERMISSION_DEQUEUE 2 | |||
#define ACL_TDT_QUEUE_PERMISSION_ENQUEUE 4 | |||
typedef void *acltdtBuf; | |||
typedef struct tagMemQueueAttr acltdtQueueAttr; | |||
typedef struct acltdtQueueRouteList acltdtQueueRouteList; | |||
typedef struct acltdtQueueRouteQueryInfo acltdtQueueRouteQueryInfo; | |||
typedef struct acltdtQueueRoute acltdtQueueRoute; | |||
typedef enum { ACL_TDT_QUEUE_NAME_PTR = 0, ACL_TDT_QUEUE_DEPTH_UINT32 } acltdtQueueAttrType; | |||
typedef enum { | |||
ACL_TDT_QUEUE_ROUTE_SRC_UINT32 = 0, | |||
ACL_TDT_QUEUE_ROUTE_DST_UINT32, | |||
ACL_TDT_QUEUE_ROUTE_STATUS_INT32 | |||
} acltdtQueueRouteParamType; | |||
typedef enum { | |||
ACL_TDT_QUEUE_ROUTE_QUERY_SRC = 0, | |||
ACL_TDT_QUEUE_ROUTE_QUERY_DST, | |||
ACL_TDT_QUEUE_ROUTE_QUERY_SRC_AND_DST | |||
} acltdtQueueRouteQueryMode; | |||
typedef enum { | |||
ACL_TDT_QUEUE_ROUTE_QUERY_MODE_ENUM = 0, | |||
ACL_TDT_QUEUE_ROUTE_QUERY_SRC_ID_UINT32, | |||
ACL_TDT_QUEUE_ROUTE_QUERY_DST_ID_UINT32 | |||
} acltdtQueueRouteQueryInfoParamType; | |||
/** | |||
* @ingroup AscendCL | |||
* @brief create queue | |||
* | |||
* @param attr [IN] pointer to the queue attr | |||
* @param qid [OUT] pointer to the qid | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtDestroyQueue | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtCreateQueue(const acltdtQueueAttr *attr, uint32_t *qid); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief destroy queue | |||
* | |||
* @param qid [IN] qid which to be destroyed | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtCreateQueue | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtDestroyQueue(uint32_t qid); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief enqueue function | |||
* | |||
* @param qid [IN] qid | |||
* @param buf [IN] acltdtBuf | |||
* @param timeout [IN] timeout | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtDequeue | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtEnqueue(uint32_t qid, acltdtBuf buf, int32_t timeout); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief dequeue function | |||
* | |||
* @param qid [IN] qid | |||
* @param buf [OUT] pointer to the acltdtBuf | |||
* @param timeout [IN] timeout | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtEnqueue | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtDequeue(uint32_t qid, acltdtBuf *buf, int32_t timeout); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief grant queue to other process | |||
* | |||
* @param qid [IN] qid | |||
* @param pid [IN] pid of dst process | |||
* @param permission [IN] permission of queue | |||
* @param timeout [IN] timeout | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see ACL_TDT_QUEUE_PERMISSION_MANAGE | ACL_TDT_QUEUE_PERMISSION_DEQUEUE | ACL_TDT_QUEUE_PERMISSION_ENQUEUE | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtGrantQueue(uint32_t qid, int32_t pid, uint32_t permission, int32_t timeout); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief attach queue in current process | |||
* | |||
* @param qid [IN] qid | |||
* @param timeout [IN] timeout | |||
* @param permission [OUT] permission of queue | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtGrantQueue | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtAttachQueue(uint32_t qid, int32_t timeout, uint32_t *permission); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief bind queue routes | |||
* | |||
* @param qRouteList [IN|OUT] pointer to the route list | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtBindQueueRoutes(acltdtQueueRouteList *qRouteList); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief unbind queue routes | |||
* | |||
* @param qRouteList [IN|OUT] pointer to the route list | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtUnbindQueueRoutes(acltdtQueueRouteList *qRouteList); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief query queue routes according to query mode | |||
* | |||
* @param queryInfo [IN] pointer to the queue route query info | |||
* @param qRouteList [IN|OUT] pointer to the route list | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtQueryQueueRoutes(const acltdtQueueRouteQueryInfo *queryInfo, | |||
acltdtQueueRouteList *qRouteList); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief alloc acltdtBuf | |||
* | |||
* @param size [IN] alloc buf size | |||
* @param buf [OUT] pointer to the acltdtBuf | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtFreeBuf | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtAllocBuf(size_t size, acltdtBuf *buf); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief free acltdtBuf | |||
* | |||
* @param buf [IN] pointer to the acltdtBuf | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtAllocBuf | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtFreeBuf(acltdtBuf buf); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get data buf address | |||
* | |||
* @param buf [IN] acltdtBuf | |||
* @param dataPtr [OUT] pointer to the data ptr which is acquired from acltdtBuf | |||
* @param size [OUT] pointer to the size | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtAllocBuf | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtGetBufData(const acltdtBuf buf, void **dataPtr, size_t *size); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Create the queue attr | |||
* | |||
* @retval null for failed | |||
* @retval OtherValues success | |||
* | |||
* @see acltdtDestroyQueueAttr | |||
*/ | |||
ACL_FUNC_VISIBILITY acltdtQueueAttr *acltdtCreateQueueAttr(); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Destroy the queue attr | |||
* | |||
* @param attr [IN] pointer to the queue attr | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtCreateQueueAttr | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueAttr(const acltdtQueueAttr *attr); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Set parameter for queue attr | |||
* | |||
* @param attr [IN|OUT] pointer to the queue attr | |||
* @param type [IN] parameter type | |||
* @param len [IN] parameter length | |||
* @param param [IN] pointer to parameter value | |||
* | |||
* @retval ACL_SUCCESS for success, other for failure | |||
* | |||
* @see acltdtCreateQueueAttr | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtSetQueueAttr(acltdtQueueAttr *attr, acltdtQueueAttrType type, size_t len, | |||
const void *param); | |||
/** | |||
* @ingroup AscendCL | |||
* | |||
* @brief Get parameter for queue attr. | |||
* | |||
* @param attr [IN] pointer to the queue attr | |||
* @param type [IN] parameter type | |||
* @param len [IN] parameter length | |||
* @param paramRetSize [OUT] pointer to parameter real length | |||
* @param param [OUT] pointer to parameter value | |||
* | |||
* @retval ACL_SUCCESS for success, other for failure | |||
* | |||
* @see acltdtCreateQueueAttr | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtGetQueueAttr(const acltdtQueueAttr *attr, acltdtQueueAttrType type, size_t len, | |||
size_t *paramRetSize, void *param); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Create the queue route | |||
* | |||
* @param srcId [IN] src id of queue route | |||
* @param dstId [IN] dst id of queue route | |||
* | |||
* @retval null for failed | |||
* @retval OtherValues success | |||
* | |||
* @see acltdtDestroyQueueRoute | |||
*/ | |||
ACL_FUNC_VISIBILITY acltdtQueueRoute *acltdtCreateQueueRoute(uint32_t srcId, uint32_t dstId); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Destroy the queue attr | |||
* | |||
* @param route [IN] pointer to the queue route | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtCreateQueueRoute | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueRoute(const acltdtQueueRoute *route); | |||
/** | |||
* @ingroup AscendCL | |||
* | |||
* @brief Get parameter for queue route. | |||
* | |||
* @param route [IN] pointer to the queue route | |||
* @param type [IN] parameter type | |||
* @param len [IN] parameter length | |||
* @param paramRetSize [OUT] pointer to parameter real length | |||
* @param param [OUT] pointer to parameter value | |||
* | |||
* @retval ACL_SUCCESS for success, other for failure | |||
* | |||
* @see acltdtCreateQueueRoute | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtGetQueueRouteParam(const acltdtQueueRoute *route, acltdtQueueRouteParamType type, | |||
size_t len, size_t *paramRetSize, void *param); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Create the queue route list | |||
* | |||
* @retval null for failed | |||
* @retval OtherValues success | |||
* | |||
* @see acltdtDestroyQueueRouteList | |||
*/ | |||
ACL_FUNC_VISIBILITY acltdtQueueRouteList *acltdtCreateQueueRouteList(); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Destroy the queue route list | |||
* | |||
* @param routeList [IN] pointer to the queue route list | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtCreateQueueRouteList | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueRouteList(const acltdtQueueRouteList *routeList); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief add queue route to the route list | |||
* | |||
* @param routeList [IN|OUT] pointer to the queue route list | |||
* @param route [IN] pointer to the queue route | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtCreateQueueRouteList | acltdtCreateQueueRoute | |||
* | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtAddQueueRoute(acltdtQueueRouteList *routeList, const acltdtQueueRoute *route); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief get queue route from route list | |||
* | |||
* @param routeList [IN] pointer to the queue route list | |||
* @param index [IN] index of queue route in route list | |||
* @param route [IN|OUT] pointer to the queue route | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtCreateQueueRouteList | acltdtCreateQueueRoute | |||
* | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtGetQueueRoute(const acltdtQueueRouteList *routeList, size_t index, | |||
acltdtQueueRoute *route); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Create the queue route query info | |||
* | |||
* @retval null for failed | |||
* @retval OtherValues success | |||
* | |||
* @see acltdtDestroyQueueRouteQueryInfo | |||
*/ | |||
ACL_FUNC_VISIBILITY acltdtQueueRouteQueryInfo *acltdtCreateQueueRouteQueryInfo(); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Destroy the queue route query info | |||
* | |||
* @param info [IN] pointer to the queue route info | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see acltdtCreateQueueRouteQueryInfo | |||
* | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueRouteQueryInfo(const acltdtQueueRouteQueryInfo *info); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Set parameter for queue route info | |||
* | |||
* @param attr [IN|OUT] pointer to the queue route info | |||
* @param type [IN] parameter type | |||
* @param len [IN] parameter length | |||
* @param param [IN] pointer to parameter value | |||
* | |||
* @retval ACL_SUCCESS for success, other for failure | |||
* | |||
* @see acltdtCreateQueueRouteQueryInfo | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtSetQueueRouteQueryInfo(acltdtQueueRouteQueryInfo *param, | |||
acltdtQueueRouteQueryInfoParamType type, size_t len, | |||
const void *value); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // INC_EXTERNAL_ACL_ACL_TDT_QUEUE_H_ |
@@ -56,6 +56,7 @@ static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event res | |||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
@@ -125,7 +125,7 @@ enum acldvppPixelFormat { | |||
enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL }; | |||
// Supported Channel Mode | |||
enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 }; | |||
enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4, DVPP_CHNMODE_PNGD = 8 }; | |||
// Supported Border Type | |||
enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 }; | |||
@@ -72,17 +72,23 @@ class GE_FUNC_VISIBILITY StatusFactory { | |||
class GE_FUNC_VISIBILITY ErrorNoRegisterar { | |||
public: | |||
ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } | |||
ErrorNoRegisterar(uint32_t err, const char *desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } | |||
ErrorNoRegisterar(uint32_t err, const std::string &desc) { | |||
StatusFactory::Instance()->RegisterErrorNo(err, desc); | |||
} | |||
ErrorNoRegisterar(uint32_t err, const char *desc) { | |||
StatusFactory::Instance()->RegisterErrorNo(err, desc); | |||
} | |||
~ErrorNoRegisterar() {} | |||
}; | |||
// Code compose(4 byte), runtime: 2 bit, type: 2 bit, level: 3 bit, sysid: 8 bit, modid: 5 bit, value: 12 bit | |||
#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc) \ | |||
constexpr ge::Status name = \ | |||
((0xFF & (static_cast<uint8_t>(runtime))) << 30) | ((0xFF & (static_cast<uint8_t>(type))) << 28) | \ | |||
((0xFF & (static_cast<uint8_t>(level))) << 25) | ((0xFF & (static_cast<uint8_t>(sysid))) << 17) | \ | |||
((0xFF & (static_cast<uint8_t>(modid))) << 12) | (0x0FFF & (static_cast<uint16_t>(value))); \ | |||
#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc) \ | |||
constexpr ge::Status name = (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(runtime))) << 30) | \ | |||
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(type))) << 28) | \ | |||
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(level))) << 25) | \ | |||
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(sysid))) << 17) | \ | |||
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(modid))) << 12) | \ | |||
(static_cast<uint32_t>(0x0FFFU) & (static_cast<uint32_t>(value))); \ | |||
const ErrorNoRegisterar g_##name##_errorno(name, desc); | |||
#define GE_ERRORNO_EXTERNAL(name, desc) const ErrorNoRegisterar g_##name##_errorno(name, desc); | |||
@@ -67,6 +67,7 @@ const char *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOp | |||
const char *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput"; | |||
const char *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode"; | |||
const char *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange"; | |||
const char *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr"; | |||
// Option key: memory init | |||
const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; | |||
@@ -33,7 +33,7 @@ | |||
extern "C" { | |||
#endif | |||
#define GE_MODULE_NAME static_cast<int>(GE) | |||
#define GE_MODULE_NAME static_cast<int32_t>(GE) | |||
// trace status of log | |||
enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP }; | |||
@@ -51,43 +51,61 @@ class GE_FUNC_VISIBILITY GeLog { | |||
}; | |||
inline bool IsLogEnable(int module_name, int log_level) { | |||
int32_t enable = CheckLogLevel(module_name, log_level); | |||
const int32_t enable = CheckLogLevel(module_name, log_level); | |||
// 1:enable, 0:disable | |||
return (enable == 1); | |||
} | |||
#define GELOGE(ERROR_CODE, fmt, ...) \ | |||
dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %d(%s) %s" fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ | |||
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ | |||
##__VA_ARGS__) | |||
#define GELOGW(fmt, ...) \ | |||
if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) \ | |||
dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | |||
#define GELOGI(fmt, ...) \ | |||
if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) \ | |||
dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | |||
#define GELOGD(fmt, ...) \ | |||
if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) \ | |||
dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | |||
#define GEEVENT(fmt, ...) dlog_event(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | |||
#define GELOGT(VALUE, fmt, ...) \ | |||
do { \ | |||
TraceStatus stat = VALUE; \ | |||
const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ | |||
int idx = static_cast<int>(stat); \ | |||
char *k = const_cast<char *>("status"); \ | |||
char *v = const_cast<char *>(TraceStatStr[idx]); \ | |||
KeyValue kv = {k, v}; \ | |||
DlogWithKV(static_cast<int>(GE_MODULE_NAME), DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, \ | |||
##__VA_ARGS__); \ | |||
} while (0) | |||
#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ | |||
dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) %s" fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ | |||
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ | |||
##__VA_ARGS__) | |||
#define GELOGE(ERROR_CODE, fmt, ...) \ | |||
do { \ | |||
dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], ERROR_CODE, \ | |||
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ | |||
##__VA_ARGS__); \ | |||
} while (false) | |||
#define GELOGW(fmt, ...) \ | |||
do { \ | |||
if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) { \ | |||
dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||
} \ | |||
} while (false) | |||
#define GELOGI(fmt, ...) \ | |||
do { \ | |||
if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) { \ | |||
dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||
} \ | |||
} while (false) | |||
#define GELOGD(fmt, ...) \ | |||
do { \ | |||
if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) { \ | |||
dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||
} \ | |||
} while (false) | |||
#define GEEVENT(fmt, ...) \ | |||
do { \ | |||
dlog_event(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||
} while (false) | |||
#define GELOGT(VALUE, fmt, ...) \ | |||
do { \ | |||
TraceStatus stat = VALUE; \ | |||
const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ | |||
const int32_t idx = static_cast<int32_t>(stat); \ | |||
char *k = const_cast<char *>("status"); \ | |||
char *v = const_cast<char *>(TraceStatStr[idx]); \ | |||
KeyValue kv = {k, v}; \ | |||
DlogWithKV(GE_MODULE_NAME, DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \ | |||
} while (false) | |||
#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ | |||
do { \ | |||
dlog_error(MOD_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], ERROR_CODE, \ | |||
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \ | |||
##__VA_ARGS__); \ | |||
} while (false) | |||
// print memory when it is greater than 1KB. | |||
#define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE) \ | |||
@@ -95,7 +113,7 @@ inline bool IsLogEnable(int module_name, int log_level) { | |||
if ((SIZE) > 1024) { \ | |||
GELOGI("MallocMemory, func=%s, size=%zu, purpose=%s", (#FUNC), static_cast<size_t>(SIZE), (PURPOSE)); \ | |||
} \ | |||
} while (0); | |||
} while (false) | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
@@ -52,82 +52,82 @@ | |||
GELOGW(__VA_ARGS__); \ | |||
} | |||
#define GE_LOGE_IF(condition, ...) \ | |||
if ((condition)) { \ | |||
DOMI_LOGE(__VA_ARGS__); \ | |||
#define GE_LOGE_IF(condition, ...) \ | |||
if ((condition)) { \ | |||
GELOGE(ge::FAILED, __VA_ARGS__); \ | |||
} | |||
// If expr is not SUCCESS, print the log and return the same value | |||
#define GE_CHK_STATUS_RET(expr, ...) \ | |||
do { \ | |||
const ge::Status _status = (expr); \ | |||
if (_status != ge::SUCCESS) { \ | |||
DOMI_LOGE(__VA_ARGS__); \ | |||
return _status; \ | |||
} \ | |||
} while (0); | |||
#define GE_CHK_STATUS_RET(expr, ...) \ | |||
do { \ | |||
const ge::Status _chk_status = (expr); \ | |||
if (_chk_status != ge::SUCCESS) { \ | |||
GELOGE(ge::FAILED, __VA_ARGS__); \ | |||
return _chk_status; \ | |||
} \ | |||
} while (false) | |||
// If expr is not SUCCESS, print the log and do not execute return | |||
#define GE_CHK_STATUS(expr, ...) \ | |||
do { \ | |||
const ge::Status _status = (expr); \ | |||
if (_status != ge::SUCCESS) { \ | |||
DOMI_LOGE(__VA_ARGS__); \ | |||
} \ | |||
} while (0); | |||
#define GE_CHK_STATUS(expr, ...) \ | |||
do { \ | |||
const ge::Status _chk_status = (expr); \ | |||
if (_chk_status != ge::SUCCESS) { \ | |||
GELOGE(ge::FAILED, __VA_ARGS__); \ | |||
} \ | |||
} while (false) | |||
// If expr is not SUCCESS, return the same value | |||
#define GE_CHK_STATUS_RET_NOLOG(expr) \ | |||
do { \ | |||
const ge::Status _status = (expr); \ | |||
if (_status != ge::SUCCESS) { \ | |||
return _status; \ | |||
} \ | |||
} while (0); | |||
#define GE_CHK_STATUS_RET_NOLOG(expr) \ | |||
do { \ | |||
const ge::Status _chk_status = (expr); \ | |||
if (_chk_status != ge::SUCCESS) { \ | |||
return _chk_status; \ | |||
} \ | |||
} while (false) | |||
// If expr is not GRAPH_SUCCESS, print the log and return FAILED | |||
#define GE_CHK_GRAPH_STATUS_RET(expr, ...) \ | |||
do { \ | |||
if ((expr) != ge::GRAPH_SUCCESS) { \ | |||
REPORT_CALL_ERROR("E19999", "Operator graph failed"); \ | |||
DOMI_LOGE(__VA_ARGS__); \ | |||
GELOGE(ge::FAILED, __VA_ARGS__); \ | |||
return FAILED; \ | |||
} \ | |||
} while (0); | |||
} while (false) | |||
// If expr is not SUCCESS, print the log and execute a custom statement | |||
#define GE_CHK_STATUS_EXEC(expr, exec_expr, ...) \ | |||
do { \ | |||
const ge::Status _status = (expr); \ | |||
GE_CHK_BOOL_EXEC(_status == SUCCESS, exec_expr, __VA_ARGS__); \ | |||
} while (0); | |||
#define GE_CHK_STATUS_EXEC(expr, exec_expr, ...) \ | |||
do { \ | |||
const ge::Status _chk_status = (expr); \ | |||
GE_CHK_BOOL_EXEC(_chk_status == SUCCESS, exec_expr, __VA_ARGS__); \ | |||
} while (false) | |||
// If expr is not true, print the log and return the specified status | |||
#define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \ | |||
do { \ | |||
bool b = (expr); \ | |||
const bool b = (expr); \ | |||
if (!b) { \ | |||
REPORT_INNER_ERROR("E19999", __VA_ARGS__); \ | |||
GELOGE(_status, __VA_ARGS__); \ | |||
return _status; \ | |||
} \ | |||
} while (0); | |||
} while (false) | |||
// If expr is not true, print the log and return the specified status | |||
#define GE_CHK_BOOL_RET_STATUS_NOLOG(expr, _status, ...) \ | |||
do { \ | |||
bool b = (expr); \ | |||
const bool b = (expr); \ | |||
if (!b) { \ | |||
return _status; \ | |||
} \ | |||
} while (0); | |||
} while (false) | |||
// If expr is not true, print the log and execute a custom statement | |||
#define GE_CHK_BOOL_EXEC(expr, exec_expr, ...) \ | |||
{ \ | |||
bool b = (expr); \ | |||
const bool b = (expr); \ | |||
if (!b) { \ | |||
DOMI_LOGE(__VA_ARGS__); \ | |||
GELOGE(ge::FAILED, __VA_ARGS__); \ | |||
exec_expr; \ | |||
} \ | |||
} | |||
@@ -135,7 +135,7 @@ | |||
// If expr is not true, print the log and execute a custom statement | |||
#define GE_CHK_BOOL_EXEC_WARN(expr, exec_expr, ...) \ | |||
{ \ | |||
bool b = (expr); \ | |||
const bool b = (expr); \ | |||
if (!b) { \ | |||
GELOGW(__VA_ARGS__); \ | |||
exec_expr; \ | |||
@@ -144,7 +144,7 @@ | |||
// If expr is not true, print the log and execute a custom statement | |||
#define GE_CHK_BOOL_EXEC_INFO(expr, exec_expr, ...) \ | |||
{ \ | |||
bool b = (expr); \ | |||
const bool b = (expr); \ | |||
if (!b) { \ | |||
GELOGI(__VA_ARGS__); \ | |||
exec_expr; \ | |||
@@ -154,7 +154,7 @@ | |||
// If expr is not true, print the log and execute a custom statement | |||
#define GE_CHK_BOOL_TRUE_EXEC_INFO(expr, exec_expr, ...) \ | |||
{ \ | |||
bool b = (expr); \ | |||
const bool b = (expr); \ | |||
if (b) { \ | |||
GELOGI(__VA_ARGS__); \ | |||
exec_expr; \ | |||
@@ -164,16 +164,16 @@ | |||
// If expr is true, print logs and execute custom statements | |||
#define GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(expr, exec_expr, ...) \ | |||
{ \ | |||
bool b = (expr); \ | |||
const bool b = (expr); \ | |||
if (b) { \ | |||
DOMI_LOGE(__VA_ARGS__); \ | |||
GELOGE(ge::FAILED, __VA_ARGS__); \ | |||
exec_expr; \ | |||
} \ | |||
} | |||
// If expr is true, print the Information log and execute a custom statement | |||
#define GE_CHK_TRUE_EXEC_INFO(expr, exec_expr, ...) \ | |||
{ \ | |||
bool b = (expr); \ | |||
const bool b = (expr); \ | |||
if (b) { \ | |||
GELOGI(__VA_ARGS__); \ | |||
exec_expr; \ | |||
@@ -183,9 +183,9 @@ | |||
// If expr is not SUCCESS, print the log and execute the expression + return | |||
#define GE_CHK_BOOL_TRUE_RET_VOID(expr, exec_expr, ...) \ | |||
{ \ | |||
bool b = (expr); \ | |||
const bool b = (expr); \ | |||
if (b) { \ | |||
DOMI_LOGE(__VA_ARGS__); \ | |||
GELOGE(ge::FAILED, __VA_ARGS__); \ | |||
exec_expr; \ | |||
return; \ | |||
} \ | |||
@@ -194,10 +194,10 @@ | |||
// If expr is not SUCCESS, print the log and execute the expression + return _status | |||
#define GE_CHK_BOOL_TRUE_EXEC_RET_STATUS(expr, _status, exec_expr, ...) \ | |||
{ \ | |||
bool b = (expr); \ | |||
const bool b = (expr); \ | |||
if (b) { \ | |||
REPORT_INNER_ERROR("E19999", __VA_ARGS__); \ | |||
DOMI_LOGE(__VA_ARGS__); \ | |||
GELOGE(ge::FAILED, __VA_ARGS__); \ | |||
exec_expr; \ | |||
return _status; \ | |||
} \ | |||
@@ -206,7 +206,7 @@ | |||
// If expr is not true, execute a custom statement | |||
#define GE_CHK_BOOL_EXEC_NOLOG(expr, exec_expr) \ | |||
{ \ | |||
bool b = (expr); \ | |||
const bool b = (expr); \ | |||
if (!b) { \ | |||
exec_expr; \ | |||
} \ | |||
@@ -214,34 +214,34 @@ | |||
// -----------------runtime related macro definitions------------------------------- | |||
// If expr is not RT_ERROR_NONE, print the log | |||
#define GE_CHK_RT(expr) \ | |||
do { \ | |||
rtError_t _rt_ret = (expr); \ | |||
if (_rt_ret != RT_ERROR_NONE) { \ | |||
DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ | |||
} \ | |||
} while (0); | |||
#define GE_CHK_RT(expr) \ | |||
do { \ | |||
const rtError_t _rt_ret = (expr); \ | |||
if (_rt_ret != RT_ERROR_NONE) { \ | |||
GELOGE(ge::FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \ | |||
} \ | |||
} while (false) | |||
// If expr is not RT_ERROR_NONE, print the log and execute the exec_expr expression | |||
#define GE_CHK_RT_EXEC(expr, exec_expr) \ | |||
{ \ | |||
rtError_t _rt_ret = (expr); \ | |||
if (_rt_ret != RT_ERROR_NONE) { \ | |||
DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ | |||
exec_expr; \ | |||
} \ | |||
} | |||
#define GE_CHK_RT_EXEC(expr, exec_expr) \ | |||
do { \ | |||
const rtError_t _rt_ret = (expr); \ | |||
if (_rt_ret != RT_ERROR_NONE) { \ | |||
GELOGE(ge::FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \ | |||
exec_expr; \ | |||
} \ | |||
} while (false) | |||
// If expr is not RT_ERROR_NONE, print the log and return | |||
#define GE_CHK_RT_RET(expr) \ | |||
do { \ | |||
rtError_t _rt_ret = (expr); \ | |||
const rtError_t _rt_ret = (expr); \ | |||
if (_rt_ret != RT_ERROR_NONE) { \ | |||
REPORT_CALL_ERROR("E19999", "Call %s fail, ret: 0x%X", #expr, _rt_ret); \ | |||
DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ | |||
GELOGE(ge::FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \ | |||
return RT_ERROR_TO_GE_STATUS(_rt_ret); \ | |||
} \ | |||
} while (0); | |||
} while (false) | |||
// If expr is true, execute exec_expr without printing logs | |||
#define GE_IF_BOOL_EXEC(expr, exec_expr) \ | |||
@@ -256,7 +256,7 @@ | |||
try { \ | |||
exec_expr0; \ | |||
} catch (const std::bad_alloc &) { \ | |||
DOMI_LOGE("Make shared failed"); \ | |||
GELOGE(ge::FAILED, "Make shared failed"); \ | |||
exec_expr1; \ | |||
} | |||
@@ -274,13 +274,13 @@ | |||
#define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg) \ | |||
do { \ | |||
bool b = (expr); \ | |||
const bool b = (expr); \ | |||
if (!b) { \ | |||
GELOGE(_status, "%s", errormsg); \ | |||
ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {errormsg}); \ | |||
return _status; \ | |||
} \ | |||
} while (0) | |||
} while (false) | |||
template <typename T> | |||
GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) { | |||
@@ -61,29 +61,29 @@ enum ErrorLevel { | |||
CRITICAL_LEVEL = 0b100, | |||
}; | |||
// Each module defines error codes using the following macros | |||
// Each module defines error codes using the following macros, name can not be modified to (name) | |||
#define GE_ERRORNO_COMMON(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, COMMON_MODULE, name, value, desc) | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, COMMON_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_CLIENT(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, CLIENT_MODULE, name, value, desc) | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, CLIENT_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_INIT(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, INIT_MODULE, name, value, desc) | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, INIT_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_SESSION(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, SESSION_MODULE, name, value, desc) | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, SESSION_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_GRAPH(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GRAPH_MODULE, name, value, desc) | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GRAPH_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_ENGINE(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, ENGINE_MODULE, name, value, desc) | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, ENGINE_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_OPS(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, OPS_MODULE, name, value, desc) | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, OPS_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_PLUGIN(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, PLUGIN_MODULE, name, value, desc) | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, PLUGIN_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_RUNTIME(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, RUNTIME_MODULE, name, value, desc) | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, RUNTIME_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_EXECUTOR(name, value, desc) \ | |||
GE_ERRORNO(RT_DEVICE, ERROR_CODE, COMMON_LEVEL, SYSID_GE, EXECUTOR_MODULE, name, value, desc) | |||
GE_ERRORNO(RT_DEVICE, ERROR_CODE, COMMON_LEVEL, SYSID_GE, EXECUTOR_MODULE, name, (value), (desc)) | |||
#define GE_ERRORNO_GENERATOR(name, value, desc) \ | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GENERATOR_MODULE, name, value, desc) | |||
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GENERATOR_MODULE, name, (value), (desc)) | |||
// Get error code description | |||
#define GE_GET_ERRORNO_STR(value) ge::StatusFactory::Instance()->GetErrDesc(value) | |||
@@ -125,13 +125,13 @@ GE_ERRORNO_CLIENT(GE_CLI_GE_ALREADY_INITIALIZED, 10, "GE is already initialized. | |||
GE_ERRORNO_CLIENT(GE_CLI_GE_NOT_INITIALIZED, 11, "GE is not yet initialized or is finalized."); // 1343229963 | |||
// Init module error code definition | |||
GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported."); // 1343234048 | |||
GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization."); // 1343234049 | |||
GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported."); // 1343234050 | |||
GE_ERRORNO_INIT(GE_PROF_MULTI_INIT, 3, "Multiple profiling initializations are not supported."); // 1343234051 | |||
GE_ERRORNO_INIT(GE_PROF_NOT_INIT, 4, "Profing initializations have not been done."); // 1343234052 | |||
GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported."); // 1343234048 | |||
GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization."); // 1343234049 | |||
GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported."); // 1343234050 | |||
GE_ERRORNO_INIT(GE_PROF_MULTI_INIT, 3, "Multiple profiling initializations are not supported."); // 1343234051 | |||
GE_ERRORNO_INIT(GE_PROF_NOT_INIT, 4, "Profing initializations have not been done."); // 1343234052 | |||
GE_ERRORNO_INIT(GE_PROF_MODE_CONFLICT, 5, | |||
"Profiling command mode which is preferred is running, the api mode will not work."); // 1343234053 | |||
"Profiling command mode which is preferred is running, the api mode will not work."); // 1343234053 | |||
// Session module error code definition | |||
GE_ERRORNO_SESSION(GE_SESS_INIT_FAILED, 0, "Failed to initialize session."); // 1343238144 | |||
@@ -216,8 +216,8 @@ GE_ERRORNO_ENGINE(GE_ENG_FINALIZE_FAILED, 1, "Engine finalize failed."); | |||
GE_ERRORNO_ENGINE(GE_ENG_MEMTYPE_ERROR, 2, "Memory type HBM is necessary when engine is in device"); // 1343246338 | |||
// Optimize errocode | |||
GE_ERRORNO_GRAPH(TO_BE_DELETED, 63, "The node of the graph to be deleted."); // 1343242303 | |||
GE_ERRORNO_GRAPH(NOT_CHANGED, 64, "The node of the graph no changed."); // 1343242304 | |||
GE_ERRORNO_GRAPH(TO_BE_DELETED, 63, "The node of the graph to be deleted."); // 1343242303 | |||
GE_ERRORNO_GRAPH(NOT_CHANGED, 64, "The node of the graph no changed."); // 1343242304 | |||
// Ops module error code definition | |||
GE_ERRORNO_OPS(GE_OPS_KERNEL_STORE_INIT_FAILED, 0, "Failed to initialize OpsKernelInfoStore."); // 1343250432 | |||
@@ -313,7 +313,7 @@ GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, 3, "Graph ma | |||
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, 4, "Graph manager finalize failed."); | |||
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_SAVE_MODEL_FAILED, 5, "Graph manager save model failed."); | |||
#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast<Status>(RT_ERROR) | |||
#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast<const Status>(RT_ERROR) | |||
} // namespace ge | |||
#endif // INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_ |
@@ -279,10 +279,19 @@ struct TaskDescInfo { | |||
}; | |||
struct OpDescInfo { | |||
std::string op_name; | |||
std::string op_type; | |||
uint32_t task_id; | |||
uint32_t stream_id; | |||
std::string op_name = ""; | |||
std::string op_type = ""; | |||
uint32_t task_id = 0; | |||
uint32_t stream_id = 0; | |||
uint32_t imply_type = 0; | |||
uint32_t block_dim = 0; | |||
std::string op_file_path = ""; | |||
std::string dev_func = ""; | |||
std::string tvm_magic = ""; | |||
uint32_t tiling_key = 0; | |||
std::string tiling_data = ""; | |||
std::string node_info = ""; | |||
std::vector<int64_t> workspace_bytes; | |||
std::vector<Format> input_format; | |||
std::vector<std::vector<int64_t>> input_shape; | |||
std::vector<DataType> input_data_type; | |||
@@ -95,7 +95,7 @@ class GE_FUNC_VISIBILITY OpUtils { | |||
/// @param [out] aipp_params aipp parameters | |||
/// @return enum of tagCCAippInputFormat | |||
/// | |||
static Status ConvertAippParams(const GeAttrValue::NamedAttrs &aipp_attr, domi::AippOpParams *aipp_params); | |||
static Status ConvertAippParams(const NamedAttrs &aipp_attr, domi::AippOpParams *aipp_params); | |||
static Status TransferDim(const std::vector<int64_t> &dim, std::vector<int64_t> &dim_vector); | |||
template <typename T> | |||
static void SliceData(const std::vector<char *> &input, int64_t chunk_size, std::vector<char *> &output, | |||
@@ -78,8 +78,8 @@ class GE_FUNC_VISIBILITY StringUtils { | |||
/// @param [in] delim separator | |||
/// @return string array after segmentation | |||
/// | |||
static std::vector<std::string> Split(const std::string &str, char delim) { | |||
std::vector<std::string> elems; | |||
static std::vector<std::string, std::allocator<std::string>> Split(const std::string &str, char delim) { | |||
std::vector<std::string, std::allocator<std::string>> elems; | |||
if (str.empty()) { | |||
elems.emplace_back(""); | |||
@@ -339,6 +339,8 @@ REGISTER_OPTYPE_DECLARE(PLACEHOLDER, "PlaceHolder"); | |||
REGISTER_OPTYPE_DECLARE(END, "End"); | |||
REGISTER_OPTYPE_DECLARE(BASICLSTMCELL, "BasicLSTMCell"); | |||
REGISTER_OPTYPE_DECLARE(GETNEXT, "GetNext"); | |||
REGISTER_OPTYPE_DECLARE(ITERATOR, "Iterator"); | |||
REGISTER_OPTYPE_DECLARE(ITERATORV2, "IteratorV2"); | |||
REGISTER_OPTYPE_DECLARE(INITDATA, "InitData"); | |||
REGISTER_OPTYPE_DECLARE(TRANSSHAPE, "TransShape") | |||
REGISTER_OPTYPE_DECLARE(REFIDENTITY, "RefIdentity"); | |||
@@ -18,8 +18,8 @@ | |||
#define INC_FRAMEWORK_COMMON_UTIL_H_ | |||
#include <google/protobuf/text_format.h> | |||
#include <limits.h> | |||
#include <math.h> | |||
#include <climits> | |||
#include <cmath> | |||
#include <sstream> | |||
#include <string> | |||
#include <vector> | |||
@@ -30,17 +30,17 @@ | |||
#include "framework/common/ge_inner_error_codes.h" | |||
#include "mmpa/mmpa_api.h" | |||
#define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ | |||
do { \ | |||
if (size <= 0) { \ | |||
DOMI_LOGE("param[%s] is not a positive number", #size); \ | |||
return PARAM_INVALID; \ | |||
} \ | |||
} while (0) | |||
#define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ | |||
do { \ | |||
if (size <= 0) { \ | |||
GELOGE(ge::FAILED, "param[%s] is not a positive number", #size); \ | |||
return PARAM_INVALID; \ | |||
} \ | |||
} while (false) | |||
#define CHECK_FALSE_EXEC(expr, exec_expr, ...) \ | |||
{ \ | |||
bool b = (expr); \ | |||
const bool b = (expr); \ | |||
if (!b) { \ | |||
exec_expr; \ | |||
} \ | |||
@@ -59,131 +59,133 @@ | |||
}); | |||
// For propagating errors when calling a function. | |||
#define GE_RETURN_IF_ERROR(expr) \ | |||
do { \ | |||
const ::ge::Status _status = (expr); \ | |||
if (_status) return _status; \ | |||
} while (0) | |||
#define GE_RETURN_IF_ERROR(expr) \ | |||
do { \ | |||
const ge::Status _chk_status = (expr); \ | |||
if (_chk_status != ge::SUCCESS) { \ | |||
return _chk_status; \ | |||
} \ | |||
} while (false) | |||
#define GE_RETURN_WITH_LOG_IF_ERROR(expr, ...) \ | |||
do { \ | |||
const ::ge::Status _status = (expr); \ | |||
if (_status) { \ | |||
DOMI_LOGE(__VA_ARGS__); \ | |||
return _status; \ | |||
const ge::Status _chk_status = (expr); \ | |||
if (_chk_status != ge::SUCCESS) { \ | |||
GELOGE(ge::FAILED, __VA_ARGS__); \ | |||
return _chk_status; \ | |||
} \ | |||
} while (0) | |||
} while (false) | |||
// check whether the parameter is true. If it is, return FAILED and record the error log | |||
#define GE_RETURN_WITH_LOG_IF_TRUE(condition, ...) \ | |||
do { \ | |||
if (condition) { \ | |||
DOMI_LOGE(__VA_ARGS__); \ | |||
GELOGE(ge::FAILED, __VA_ARGS__); \ | |||
return ge::FAILED; \ | |||
} \ | |||
} while (0) | |||
} while (false) | |||
// Check if the parameter is false. If yes, return FAILED and record the error log | |||
#define GE_RETURN_WITH_LOG_IF_FALSE(condition, ...) \ | |||
do { \ | |||
bool _condition = (condition); \ | |||
const bool _condition = (condition); \ | |||
if (!_condition) { \ | |||
DOMI_LOGE(__VA_ARGS__); \ | |||
GELOGE(ge::FAILED, __VA_ARGS__); \ | |||
return ge::FAILED; \ | |||
} \ | |||
} while (0) | |||
} while (false) | |||
// Checks whether the parameter is true. If so, returns PARAM_INVALID and records the error log | |||
#define GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(condition, ...) \ | |||
do { \ | |||
if (condition) { \ | |||
DOMI_LOGE(__VA_ARGS__); \ | |||
GELOGE(ge::FAILED, __VA_ARGS__); \ | |||
return ge::PARAM_INVALID; \ | |||
} \ | |||
} while (0) | |||
} while (false) | |||
// Check if the parameter is false. If yes, return PARAM_INVALID and record the error log | |||
#define GE_RT_PARAM_INVALID_WITH_LOG_IF_FALSE(condition, ...) \ | |||
do { \ | |||
bool _condition = (condition); \ | |||
const bool _condition = (condition); \ | |||
if (!_condition) { \ | |||
DOMI_LOGE(__VA_ARGS__); \ | |||
GELOGE(ge::FAILED, __VA_ARGS__); \ | |||
return ge::PARAM_INVALID; \ | |||
} \ | |||
} while (0) | |||
} while (false) | |||
// Check if the parameter is null. If yes, return PARAM_INVALID and record the error | |||
#define GE_CHECK_NOTNULL(val) \ | |||
do { \ | |||
if (val == nullptr) { \ | |||
REPORT_INNER_ERROR("E19999", "Param:%s is nullptr, check invalid", #val); \ | |||
DOMI_LOGE("[Check][Param:%s]null is invalid.", #val); \ | |||
GELOGE(ge::FAILED, "[Check][Param:%s]null is invalid.", #val); \ | |||
return ge::PARAM_INVALID; \ | |||
} \ | |||
} while (0) | |||
} while (false) | |||
// Check if the parameter is null. If yes, just return and record the error | |||
#define GE_CHECK_NOTNULL_JUST_RETURN(val) \ | |||
do { \ | |||
if (val == nullptr) { \ | |||
DOMI_LOGE("param[%s] must not be null.", #val); \ | |||
return; \ | |||
} \ | |||
} while (0) | |||
#define GE_CHECK_NOTNULL_JUST_RETURN(val) \ | |||
do { \ | |||
if (val == nullptr) { \ | |||
GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ | |||
return; \ | |||
} \ | |||
} while (false) | |||
// Check whether the parameter is null. If so, execute the exec_expr expression and record the error log | |||
#define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \ | |||
do { \ | |||
if (val == nullptr) { \ | |||
DOMI_LOGE("param[%s] must not be null.", #val); \ | |||
exec_expr; \ | |||
} \ | |||
} while (0) | |||
#define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \ | |||
do { \ | |||
if (val == nullptr) { \ | |||
GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ | |||
exec_expr; \ | |||
} \ | |||
} while (false) | |||
// Check whether the parameter is null. If yes, return directly and record the error log | |||
#define GE_RT_VOID_CHECK_NOTNULL(val) \ | |||
do { \ | |||
if (val == nullptr) { \ | |||
DOMI_LOGE("param[%s] must not be null.", #val); \ | |||
return; \ | |||
} \ | |||
} while (0) | |||
#define GE_RT_VOID_CHECK_NOTNULL(val) \ | |||
do { \ | |||
if (val == nullptr) { \ | |||
GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ | |||
return; \ | |||
} \ | |||
} while (false) | |||
// Check if the parameter is null. If yes, return false and record the error log | |||
#define GE_RT_FALSE_CHECK_NOTNULL(val) \ | |||
do { \ | |||
if (val == nullptr) { \ | |||
DOMI_LOGE("param[%s] must not be null.", #val); \ | |||
return false; \ | |||
} \ | |||
} while (0) | |||
#define GE_RT_FALSE_CHECK_NOTNULL(val) \ | |||
do { \ | |||
if (val == nullptr) { \ | |||
GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \ | |||
return false; \ | |||
} \ | |||
} while (false) | |||
// Check if the parameter is out of bounds | |||
#define GE_CHECK_SIZE(size) \ | |||
do { \ | |||
if (size == 0) { \ | |||
DOMI_LOGE("param[%s] is out of range", #size); \ | |||
return ge::PARAM_INVALID; \ | |||
} \ | |||
} while (0) | |||
#define GE_CHECK_SIZE(size) \ | |||
do { \ | |||
if (size == 0) { \ | |||
GELOGE(ge::FAILED, "param[%s] is out of range", #size); \ | |||
return ge::PARAM_INVALID; \ | |||
} \ | |||
} while (false) | |||
// Check if the value on the left is greater than or equal to the value on the right | |||
#define GE_CHECK_GE(lhs, rhs) \ | |||
do { \ | |||
if (lhs < rhs) { \ | |||
DOMI_LOGE("param[%s] is less than[%s]", #lhs, #rhs); \ | |||
return ge::PARAM_INVALID; \ | |||
} \ | |||
} while (0) | |||
#define GE_CHECK_GE(lhs, rhs) \ | |||
do { \ | |||
if (lhs < rhs) { \ | |||
GELOGE(ge::FAILED, "param[%s] is less than[%s]", #lhs, #rhs); \ | |||
return ge::PARAM_INVALID; \ | |||
} \ | |||
} while (false) | |||
// Check if the value on the left is less than or equal to the value on the right | |||
#define GE_CHECK_LE(lhs, rhs) \ | |||
do { \ | |||
if (lhs > rhs) { \ | |||
DOMI_LOGE("param[%s] is greater than[%s]", #lhs, #rhs); \ | |||
return ge::PARAM_INVALID; \ | |||
} \ | |||
} while (0) | |||
#define GE_CHECK_LE(lhs, rhs) \ | |||
do { \ | |||
if (lhs > rhs) { \ | |||
GELOGE(ge::FAILED, "param[%s] is greater than[%s]", #lhs, #rhs); \ | |||
return ge::PARAM_INVALID; \ | |||
} \ | |||
} while (false) | |||
#define GE_DELETE_NEW_SINGLE(var) \ | |||
do { \ | |||
@@ -191,7 +193,7 @@ | |||
delete var; \ | |||
var = nullptr; \ | |||
} \ | |||
} while (0) | |||
} while (false) | |||
#define GE_DELETE_NEW_ARRAY(var) \ | |||
do { \ | |||
@@ -199,18 +201,18 @@ | |||
delete[] var; \ | |||
var = nullptr; \ | |||
} \ | |||
} while (0) | |||
} while (false) | |||
#define GE_FREE_RT_LOG(addr) \ | |||
do { \ | |||
if (addr != nullptr) { \ | |||
rtError_t error = rtFree(addr); \ | |||
const rtError_t error = rtFree(addr); \ | |||
if (error != RT_ERROR_NONE) { \ | |||
GELOGE(RT_FAILED, "Call rtFree failed, error: %#x", error); \ | |||
} \ | |||
addr = nullptr; \ | |||
} \ | |||
} while (0) | |||
} while (false) | |||
/** | |||
* @ingroup domi_common | |||
@@ -228,12 +230,6 @@ using google::protobuf::Message; | |||
/// | |||
/// @ingroup domi_common | |||
/// @brief Maximum file path length | |||
/// | |||
const int32_t DOMI_MAX_PATH_LEN = 256; | |||
/// | |||
/// @ingroup domi_common | |||
/// @brief Reads the proto structure from an array. | |||
/// @param [in] data proto data to be read | |||
/// @param [in] size proto data size | |||
@@ -253,8 +249,6 @@ GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *data, int size, Message * | |||
/// | |||
GE_FUNC_VISIBILITY bool ReadProtoFromText(const char *file, google::protobuf::Message *message); | |||
GE_FUNC_VISIBILITY bool ReadProtoFromMem(const char *data, int size, google::protobuf::Message *message); | |||
/// | |||
/// @ingroup: domi_common | |||
/// @brief: get length of file | |||
@@ -306,10 +300,10 @@ GE_FUNC_VISIBILITY std::string ToString(std::vector<T> &v) { | |||
ss << x; | |||
ss << ", "; | |||
} | |||
std::string strRet = | |||
ss.str().substr(0, ss.str().length() - 2); // Delete the two extra characters at the end of the line. | |||
strRet += "]"; | |||
return strRet; | |||
// Delete the two extra characters at the end of the line. | |||
std::string str = ss.str().substr(0u, ss.str().length() - 2u); | |||
str += "]"; | |||
return str; | |||
} | |||
/// | |||
@@ -326,10 +320,10 @@ GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedField<T> | |||
ss << x; | |||
ss << ", "; | |||
} | |||
std::string strRet = | |||
ss.str().substr(0, ss.str().length() - 2); // Delete the two extra characters at the end of the line. | |||
strRet += "]"; | |||
return strRet; | |||
// Delete the two extra characters at the end of the line. | |||
std::string str = ss.str().substr(0u, ss.str().length() - 2u); | |||
str += "]"; | |||
return str; | |||
} | |||
/// | |||
@@ -394,14 +388,6 @@ GE_FUNC_VISIBILITY bool ValidateStr(const std::string &filePath, const std::stri | |||
/// | |||
/// @ingroup domi_common | |||
/// @brief Check whether the file is normal file. | |||
/// @param [in] file_path file path | |||
/// @param [out] result | |||
/// | |||
GE_FUNC_VISIBILITY bool IsValidFile(const char *file_path); | |||
/// | |||
/// @ingroup domi_common | |||
/// @brief Check path invalid | |||
/// @param [in] path, path to be checked | |||
/// @param [in] length, length of path | |||
@@ -43,14 +43,31 @@ struct DNNEngineAttribute { | |||
// If engine input format must be specific, set this attribute, else set FORMAT_RESERVED | |||
Format engine_input_format; | |||
Format engine_output_format; | |||
bool atomic_engine_flag; | |||
}; | |||
class GE_FUNC_VISIBILITY DNNEngine { | |||
public: | |||
DNNEngine() = default; | |||
explicit DNNEngine(const DNNEngineAttribute &attrs) { | |||
engine_attribute_ = attrs; | |||
} | |||
virtual ~DNNEngine() = default; | |||
virtual Status Initialize(const std::map<std::string, std::string> &options) = 0; | |||
virtual Status Finalize() = 0; | |||
virtual void GetAttributes(DNNEngineAttribute &attr) const = 0; | |||
Status Initialize(const std::map<std::string, std::string> &options) { | |||
return SUCCESS; | |||
} | |||
Status Finalize() { | |||
return SUCCESS; | |||
} | |||
void GetAttributes(DNNEngineAttribute &attr) const { | |||
attr = engine_attribute_; | |||
} | |||
bool IsAtomic() const { | |||
return engine_attribute_.atomic_engine_flag; | |||
} | |||
protected: | |||
DNNEngineAttribute engine_attribute_; | |||
}; | |||
} // namespace ge | |||
@@ -34,13 +34,16 @@ namespace ge { | |||
class GeRootModel; | |||
class GE_FUNC_VISIBILITY GeGenerator { | |||
public: | |||
using InOutTensorRef = std::pair<const vector<ge::GeTensor> &, const vector<ge::GeTensor> &>; | |||
static GeGenerator &GetInstance() { | |||
static GeGenerator Instance; | |||
return Instance; | |||
} | |||
GeGenerator() = default; | |||
~GeGenerator() { (void)Finalize(); } | |||
~GeGenerator() { | |||
(void)Finalize(); | |||
} | |||
GeGenerator(const GeGenerator &) = delete; | |||
@@ -94,8 +97,8 @@ class GE_FUNC_VISIBILITY GeGenerator { | |||
/// @param [in] graph_name: graph name. | |||
/// @param [out] graph: graph of single op. | |||
/// @return SUCCESS or FAILED | |||
Status BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs, | |||
std::string graph_name, Graph &graph); | |||
Status BuildSingleOpGraph(OpDescPtr &op_desc, const InOutTensorRef &inputs_outputs, std::string graph_name, | |||
Graph &graph, std::vector<std::pair<std::string, std::string>> &inputs_name_type); | |||
private: | |||
Status GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs, | |||
@@ -110,6 +113,10 @@ class GE_FUNC_VISIBILITY GeGenerator { | |||
using GeRootModelPtr = std::shared_ptr<ge::GeRootModel>; | |||
Status SetModelNameForDump(const GeRootModelPtr &ge_root_model); | |||
Status CreateGeneralizedBuildAttrs(const GeRootModelPtr &ge_root_model, const std::vector<GeTensor> &inputs, | |||
const std::vector<GeTensor> &outputs, | |||
const std::vector<std::pair<std::string, std::string>> &inputs_name_type, | |||
std::vector<ge::NamedAttrs> &generalized_build_attrs); | |||
class Impl; | |||
@@ -128,6 +128,7 @@ struct OmgContext { | |||
bool fuzz_compile_flag = false; | |||
std::string atc_cmdline; | |||
bool user_attr_index_valid = false; | |||
bool is_online_model = false; | |||
}; | |||
} // namespace ge | |||
@@ -33,7 +33,7 @@ class GE_FUNC_VISIBILITY PlatformVersionManager { | |||
~PlatformVersionManager() = delete; | |||
static Status GetPlatformVersion(std::string &ver) { | |||
ver = "1.11.z"; | |||
std::vector<std::string> version_splits = StringUtils::Split(ver, '.'); | |||
const std::vector<std::string> version_splits = StringUtils::Split(ver, '.'); | |||
GE_IF_BOOL_EXEC(version_splits.size() < 3, GELOGW("Read platform version error!"); return FAILED;); | |||
GELOGI("Read current platform version: %s.", ver.c_str()); | |||
@@ -1 +1 @@ | |||
Subproject commit 60df4b39a6f639c21dd7deb220b93345451938f5 | |||
Subproject commit f011a4c7ad36e1ec80990e659abefc78b0aa7543 |
@@ -1,109 +1,107 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||
#define __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||
#include <stddef.h> | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif | |||
static const int32_t ACL_RT_SUCCESS = 0; // success | |||
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout | |||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||
/** | |||
* @file rt_error_codes.h | |||
* | |||
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved. | |||
* | |||
* This program is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
*/ | |||
#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||
#define __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||
#include <stddef.h> | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif | |||
static const int32_t ACL_RT_SUCCESS = 0; // success | |||
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout | |||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode | |||
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die | |||
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id | |||
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set | |||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ |
@@ -1258,21 +1258,21 @@ REG_OP(ExpandD) | |||
* Three inputs, including: | |||
* @li bucket_list: A 1-D tensor of type int32 with the value of ivf_counts and ivf_offset index. \n | |||
* @li ivf_counts: A 1-D tensor of type int32 with the value of ivf counts. \n | |||
* @li ivf_offset: A 1-D tensor of type int32 with the value of ivf offset. \n | |||
* @li ivf_offset: A 1-D tensor of type int32 or int64 with the value of ivf offset. \n | |||
* @par Attributes: | |||
* total_limit: A int64 type maximum value of the sum of ivf_counts corresponding to bucket_list. \n | |||
* @par Outputs: | |||
* @li buckets_limit: A 1-D tensor of type int32 with the sum <= total_limit. \n | |||
* @li buckets_offset: A 1-D tensor of type int32 with the value of ivf_offset corresponding to bucket_list. \n | |||
* @li buckets_offset: A 1-D tensor of type int32 or int64 with the value of ivf_offset corresponding to bucket_list. \n | |||
*/ | |||
REG_OP(CalcBucketsLimitAndOffset) | |||
.INPUT(bucket_list, TensorType({DT_INT32})) | |||
.INPUT(ivf_counts, TensorType({DT_INT32})) | |||
.INPUT(ivf_offset, TensorType({DT_INT32})) | |||
.INPUT(ivf_offset, TensorType({DT_INT32, DT_INT64})) | |||
.OUTPUT(buckets_limit, TensorType({DT_INT32})) | |||
.OUTPUT(buckets_offset, TensorType({DT_INT32})) | |||
.OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64})) | |||
.REQUIRED_ATTR(total_limit, Int) | |||
.OP_END_FACTORY_REG(CalcBucketsLimitAndOffset) | |||
@@ -0,0 +1,58 @@ | |||
/** | |||
* Copyright 2021 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
/*! | |||
* \file cluster.h | |||
* \brief | |||
*/ | |||
#ifndef OPS_BUILT_IN_OP_PROTO_INC_CLUSTER_H_ | |||
#define OPS_BUILT_IN_OP_PROTO_INC_CLUSTER_H_ | |||
#include "graph/operator_reg.h" | |||
#include "graph/operator.h" | |||
namespace ge { | |||
/** | |||
* @brief Perform k-means clustering on a data matrix. \n | |||
* @par Inputs: | |||
* Three required inputs and one optional inputs, including: \n | |||
* @li x: A 2D tensor of data type float32. \n | |||
* @li y: A 2D tensor of data type float32. \n | |||
* @li sum_square_x: An optional 2D tensor of data type float32. \n | |||
* @li sum_square_y: A 2D tensor of data type float32. \n | |||
* @par Attributes: | |||
* use_actual_distance: Indicates whether to calculate the complete distance. \n | |||
* @par Outputs: | |||
* @li segment_sum: A tensor of data type float32. \n | |||
* @li segment_count: A tensor of data type float32. \n | |||
* @li k_mean_total_sum: A tensor of data type float32. \n | |||
*/ | |||
REG_OP(KMeansCentroids) | |||
.INPUT(x, TensorType({DT_FLOAT})) | |||
.INPUT(y, TensorType({DT_FLOAT})) | |||
.INPUT(sum_square_y, TensorType({DT_FLOAT})) | |||
.OPTIONAL_INPUT(sum_square_x, TensorType({DT_FLOAT})) | |||
.OUTPUT(segment_sum, TensorType({DT_FLOAT})) | |||
.OUTPUT(segment_count, TensorType({DT_FLOAT})) | |||
.OUTPUT(kmean_total_sum, TensorType({DT_FLOAT})) | |||
.ATTR(use_actual_distance, Bool, false) | |||
.OP_END_FACTORY_REG(KMeansCentroids) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_CLUSTER_H_ |
@@ -2336,12 +2336,14 @@ REG_OP(CacheAllIndexToLocal) | |||
/** | |||
*@brief LRUCacheV2, aicore LRUCache. | |||
*@par Inputs: | |||
*index_list: exchange index list | |||
*data: host data | |||
*cache: gm cache | |||
*tag: cache's tag | |||
*is_last_call: if is last call write all cache to data | |||
*@par Outputs: | |||
*data: output data | |||
*cache: gm cache | |||
@@ -2349,8 +2351,11 @@ REG_OP(CacheAllIndexToLocal) | |||
*index_offset_list: index_offset_list | |||
*not_in_cache_index_list: output not in cache's index_list | |||
*not_in_cache_number: scalar | |||
*@par Attributes: | |||
*pre_route_count: types of all outputs | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(LRUCacheV2) | |||
@@ -3391,57 +3391,57 @@ REG_OP(TensorRedirect) | |||
.OP_END_FACTORY_REG(TensorRedirect) | |||
/** | |||
* @brief Performs the element-wise division of tensor x2 by tensor x3, | |||
* multiply the result by the scalar value and add it to tensor x1 | |||
* @brief Performs the element-wise division of tensor x1 by tensor x2, | |||
* multiply the result by the scalar value and add it to tensor input_data. | |||
* @par Inputs: | |||
* Four inputs, including: | |||
* @li input_data: A mutable input Tensor. Must be one of the following types: | |||
* float16, float32. | |||
* @li x1: A mutable input Tensor of the same type as x1. | |||
* @li x2: A mutable input Tensor of the same type as x1. | |||
* float16, float32, double, int64. | |||
* @li x1: A mutable input Tensor of the same type as input_data. | |||
* @li x2: A mutable input Tensor of the same type as input_data. | |||
* @li value: A mutable input Tensor. Must be one of the following types: | |||
* float16, float32, int32. \n | |||
* float16, float32, double, int64, int32. \n | |||
* @par Outputs: | |||
* y: A mutable Tensor. Has the same type as "x1". \n | |||
* y: A mutable Tensor. Has the same type as input_data. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator Addcdiv. | |||
* Compatible with the Pytorch operator Addcdiv(version-1.5.0). | |||
*/ | |||
REG_OP(Addcdiv) | |||
.INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64})) | |||
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64})) | |||
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64})) | |||
.INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32, DT_DOUBLE, DT_INT64})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64})) | |||
.OP_END_FACTORY_REG(Addcdiv) | |||
/** | |||
* @brief Performs the element-wise multiplication of tensor x2 by tensor x3, | |||
* @brief Performs the element-wise multiplication of tensor x1 by tensor x2, | |||
* multiply the result by the scalar value and add it to tensor input_data | |||
* @par Inputs: | |||
* Four inputs, including: | |||
* @li input_data: A mutable input Tensor. Must be one of the following types: | |||
* float16, float32, int8, int32, uint8. | |||
* @li x1: A mutable input Tensor of the same type as x1. | |||
* @li x2: A mutable input Tensor of the same type as x1. | |||
* @li value: A tensor which includes only one element of the same type as x1. \n | |||
* float16, float32, double, int64, int8, int32, uint8. | |||
* @li x1: A mutable input Tensor of the same type as input_data. | |||
* @li x2: A mutable input Tensor of the same type as input_data. | |||
* @li value: A tensor which includes only one element of the same type as input_data. \n | |||
* @par Outputs: | |||
* y: A mutable output Tensor. Has the same type as "x1". \n | |||
* y: A mutable output Tensor. Has the same type as input_data. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator Addcmul. | |||
*/ | |||
REG_OP(Addcmul) | |||
.INPUT(input_data, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||
.INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||
.INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||
.INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||
.OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 })) | |||
.INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64})) | |||
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64})) | |||
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64})) | |||
.INPUT(value, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64})) | |||
.OP_END_FACTORY_REG(Addcmul) | |||
/** | |||
@@ -3508,8 +3508,8 @@ REG_OP(StrideAdd) | |||
* Compatible with the Pytorch equal operator. \n | |||
*/ | |||
REG_OP(TensorEqual) | |||
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||
.INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8})) | |||
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_INT8, DT_UINT8})) | |||
.INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_INT8, DT_UINT8})) | |||
.OUTPUT(output_z, TensorType({DT_BOOL})) | |||
.OP_END_FACTORY_REG(TensorEqual) | |||
@@ -410,10 +410,10 @@ form square matrices. \n | |||
*/ | |||
REG_OP(Svd) | |||
.INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT })) | |||
.OUTPUT(sigma, TensorType({ DT_DOUBLE, DT_FLOAT })) | |||
.OUTPUT(u, TensorType({ DT_DOUBLE, DT_FLOAT })) | |||
.OUTPUT(v, TensorType({ DT_DOUBLE, DT_FLOAT })) | |||
.INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 })) | |||
.OUTPUT(sigma, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 })) | |||
.OUTPUT(u, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 })) | |||
.OUTPUT(v, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 })) | |||
.ATTR(compute_uv, Bool, true) | |||
.ATTR(full_matrices, Bool, false) | |||
.OP_END_FACTORY_REG(Svd) | |||
@@ -480,7 +480,7 @@ REG_OP(HistogramFixedWidth) | |||
.INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) | |||
.INPUT(nbins, TensorType({DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_INT32})) | |||
.ATTR(dtype, String, "int32") | |||
.ATTR(dtype, Int, 3) | |||
.OP_END_FACTORY_REG(HistogramFixedWidth) | |||
/** | |||
@@ -511,7 +511,7 @@ REG_OP(HistogramFixedWidthD) | |||
.INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) | |||
.OUTPUT(y, TensorType({DT_INT32})) | |||
.REQUIRED_ATTR(nbins, Int) | |||
.ATTR(dtype, String, "int32") | |||
.ATTR(dtype, Int, 3) | |||
.OP_END_FACTORY_REG(HistogramFixedWidthD) | |||
/** | |||
@@ -939,6 +939,57 @@ REG_OP(LpNorm) | |||
.OP_END_FACTORY_REG(LpNorm) | |||
/** | |||
* @brief Computes LpNormReduce. | |||
* @par Inputs: | |||
* x: An ND tensor of type float16, float32. \n | |||
* | |||
* @par Attributes: | |||
* @li p: Int, "inf" or "-inf", default value is 2. | |||
* @li axes: ListInt, {} means all axes will be computed. | |||
* @li keepdim: Bool, default is false. | |||
* @li epsilon: Float, default is 1e-12. \n | |||
* @par Outputs: | |||
* y: An ND tensor of type float16, float32. The shape of y is depending | |||
* on axes and keepdim. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator LpNormReduce. | |||
*/ | |||
REG_OP(LpNormReduce) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(p, Int, 2) | |||
.ATTR(axes, ListInt, {}) | |||
.ATTR(keepdim, Bool, false) | |||
.ATTR(epsilon, Float, 1e-12) | |||
.OP_END_FACTORY_REG(LpNormReduce) | |||
/** | |||
* @brief Computes LpNormUpdate. | |||
* @par Inputs: | |||
* x: An ND tensor of type float16, float32. \n | |||
* | |||
* @par Attributes: | |||
* @li p: Int, "inf" or "-inf", default value is 2. | |||
* @li epsilon: Float, default is 1e-12. \n | |||
* @par Outputs: | |||
* y: An ND tensor of type float16, float32. \n | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator LpNormUpdate. | |||
*/ | |||
REG_OP(LpNormUpdate) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(p, Int, 2) | |||
.ATTR(epsilon, Float, 1e-12) | |||
.OP_END_FACTORY_REG(LpNormUpdate) | |||
/** | |||
* @brief get complex. | |||
* @par Inputs: | |||
@@ -49,10 +49,10 @@ namespace ge { | |||
* Compatible with the TensorFlow operator BatchMatmul. | |||
*/ | |||
REG_OP(MatMul) | |||
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) | |||
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) | |||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) | |||
.ATTR(transpose_x1, Bool, false) | |||
.ATTR(transpose_x2, Bool, false) | |||
.OP_END_FACTORY_REG(MatMul) | |||
@@ -88,10 +88,10 @@ REG_OP(MatMul) | |||
* Compatible with the TensorFlow operator BatchMatmul. | |||
*/ | |||
REG_OP(MatMulV2) | |||
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4})) | |||
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8 DT_INT4})) | |||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16})) | |||
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16})) | |||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16})) | |||
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4})) | |||
.ATTR(transpose_x1, Bool, false) | |||
.ATTR(transpose_x2, Bool, false) | |||
@@ -532,6 +532,36 @@ REG_OP(ScatterAdd) | |||
.OP_END_FACTORY_REG(ScatterAdd) | |||
/** | |||
*@brief Adds sparse "updates" to a variable reference . \n | |||
*@par Inputs: | |||
* Three inputs, including: | |||
*@li var: An ND Tensor . | |||
*Must be one of the following types: float16, float32, int32, int8, uint8 | |||
*@li indices: An ND Tensor of type int32 or int64 | |||
*@li updates: An ND Tensor . | |||
*Must be one of the following types: float16, float32, int32, int8, uint8 | |||
*@par Attributes: | |||
* axis: An required int. The axis along which to index. \n | |||
*@par Outputs: | |||
*var: A Tensor. Has the same type and format as input "var" . \n | |||
*@par Third-party framework compatibility | |||
* Compatible with the pytorch operator ScatterAdd. | |||
*/ | |||
REG_OP(ScatterAddWithAxis) | |||
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||
.INPUT(indices, TensorType::IndexNumberType()) | |||
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) | |||
.REQUIRED_ATTR(axis, Int) | |||
.OP_END_FACTORY_REG(ScatterAddWithAxis) | |||
/** | |||
*@brief Divides a variable reference by sparse updates . \n | |||
*@par Inputs: | |||
@@ -1067,6 +1097,40 @@ REG_OP(MatrixSetDiagV2) | |||
.OP_END_FACTORY_REG(MatrixSetDiagV2) | |||
/** | |||
*@brief Returns a batched matrix tensor with new batched diagonal values . \n | |||
*@par Inputs: | |||
* Three inputs, including: | |||
*@li input: "Rank `r+1`, where `r >= 1`. \n | |||
*@li diagonal: Rank `r` when `k` is an integer or `k[0] == k[1]`. Otherwise, it has rank `r+1`. \n | |||
*@li k: | |||
*Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main \n | |||
*diagonal, and negative value means subdiagonals. `k` can be a single integer \n | |||
*(for a single diagonal) or a pair of integers specifying the low and high ends \n | |||
*of a matrix band. `k[0]` must not be larger than `k[1]`. \n | |||
*@par Attributes: | |||
*@li align: An optional string. Defaults to RIGHT_LEFT. It is a string specifying \n | |||
*how superdiagonals and subdiagonals should be aligned, respectively. \n | |||
*other optional: LEFT_RIGHT, LEFT_LEFT, and RIGHT_RIGHT.\n | |||
*@par Outputs: | |||
*output: Rank `r+1`, with `output.shape = input.shape` . \n | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator ScatterUpdate. | |||
*/ | |||
REG_OP(MatrixSetDiagV3) | |||
.INPUT(input, TensorType::BasicType()) | |||
.INPUT(diagonal, TensorType::BasicType()) | |||
.INPUT(k, TensorType({DT_INT32})) | |||
.OUTPUT(output, TensorType::BasicType()) | |||
.ATTR(align, String, "RIGHT_LEFT") | |||
.OP_END_FACTORY_REG(MatrixSetDiagV3) | |||
/** | |||
*@brief Returns a batched diagonal tensor with given batched diagonal values . \n | |||
*@par Inputs: | |||
@@ -142,6 +142,29 @@ REG_OP(BatchNorm) | |||
.OP_END_FACTORY_REG(BatchNorm) | |||
/** | |||
*@brief part of SyncBatchNormBackward . \n | |||
*@par Inputs: | |||
* Three inputs, including: | |||
*@li sum_dy: A Tensor. Must be one of the following types: float16, float32 . | |||
*@li sum_dy_dx_pad: A Tensor. Must be one of the following types: float16, float32 . | |||
*@li mean: A Tensor. Must be one of the following types: float16, float32 . | |||
*@li invert_std: A Tensor. Must be one of the following types: float16, float32 . \n | |||
*@par Outputs: | |||
*@li sum_dy_xmu: A Tensor. Has the same type and format as input "sum_dy" | |||
*@li y: A Tensor. Has the same type and format as input "sum_dy" . \n | |||
*/ | |||
REG_OP(SyncBatchNormBackwardReduce) | |||
.INPUT(sum_dy, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(sum_dy_dx_pad, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(invert_std, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(sum_dy_xmu, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OP_END_FACTORY_REG(SyncBatchNormBackwardReduce) | |||
/** | |||
*@brief Performs batch normalization . \n | |||
*@par Inputs: | |||
@@ -369,16 +369,14 @@ REG_OP(BiasAddGrad) | |||
*\n | |||
* The following are the supported data types and data formats:\n | |||
*\n | |||
| Tensor | out_bckprop | filter | y\n | |||
------------|-------------|---------|--------\n | |||
| Data Type | float16 | float16 | float16\n | |||
| |-------------|---------|--------\n | |||
| | float32 | float32 | float32\n | |||
| |-------------|---------|--------\n | |||
| | float64 | float64 | float64\n | |||
------------|-------------|---------|--------\n | |||
| Format | NCHW | NCHW | NCHW\n | |||
| | NHWC | HWCN | NHWC\n | |||
*\n | |||
| Tensor | out_bckprop | filter | y |\n | |||
|-----------|-------------|---------|--------|\n | |||
| Data Type | float16 | float16 | float16|\n | |||
| | float32 | float32 | float32|\n | |||
| | float64 | float64 | float64|\n | |||
| Format | NCHW | NCHW | NCHW |\n | |||
| | NHWC | HWCN | NHWC |\n | |||
*\n | |||
* For float32 and float64 type, the actual calculation on the chip is based on | |||
* float16. | |||
@@ -400,30 +398,25 @@ REG_OP(BiasAddGrad) | |||
*\n | |||
* The following value range restrictions must be met:\n | |||
*\n | |||
| Name | Field | Scope\n | |||
-------------------|----------|--------------\n | |||
| input_size | H | [1, 200000]\n | |||
| | W | [1, 4096]\n | |||
-------------------|----------|--------------\n | |||
| Filter | H | [1, 255]\n | |||
| | W | [1, 255]\n | |||
-------------------|----------|--------------\n | |||
| out_backprop | H*strideH| [1, 200000]\n | |||
| | W*strideW| [1, 4096]\n | |||
-------------------|----------|--------------\n | |||
| y(fmap) | H | [1, 200000]\n | |||
| | W | [1, 4096]\n | |||
-------------------|----------|--------------\n | |||
| Stride | H | [1, 63]\n | |||
| | W | [1, 63]\n | |||
-------------------|----------|--------------\n | |||
| Padding | Top | [0, 255]\n | |||
| | Bottom | [0, 255]\n | |||
| | Left | [0, 255]\n | |||
| | Right | [0, 255]\n | |||
-------------------|----------|--------------\n | |||
| Dilation | H | [1, 255]\n | |||
| | W | [1, 255]\n | |||
*\n | |||
| Name | Field | Scope |\n | |||
|------------------|----------|--------------|\n | |||
| input_size | H | [1, 200000] |\n | |||
| | W | [1, 4096] |\n | |||
| Filter | H | [1, 255] |\n | |||
| | W | [1, 255] |\n | |||
| out_backprop | H*strideH| [1, 200000] |\n | |||
| | W*strideW| [1, 4096] |\n | |||
| y(fmap) | H | [1, 200000] |\n | |||
| | W | [1, 4096] |\n | |||
| Stride | H | [1, 63] |\n | |||
| | W | [1, 63] |\n | |||
| Padding | Top | [0, 255] |\n | |||
| | Bottom | [0, 255] |\n | |||
| | Left | [0, 255] |\n | |||
| | Right | [0, 255] |\n | |||
| Dilation | H | [1, 255] |\n | |||
| | W | [1, 255] |\n | |||
*\n | |||
* In Ascend910, fmap or out_backprop's H and W not support 1 when | |||
@@ -495,9 +488,9 @@ REG_OP(Conv2DBackpropInput) | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv2DBackpropInput instead. | |||
*/ | |||
REG_OP(Conv2DBackpropInputD) | |||
.INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) | |||
.INPUT(out_backprop, TensorType({DT_FLOAT16, DT_INT8})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32})) | |||
.INPUT(filter, TensorType({DT_FLOAT16, DT_INT8, DT_BF16})) | |||
.INPUT(out_backprop, TensorType({DT_FLOAT16, DT_INT8, DT_BF16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32, DT_BF16})) | |||
.REQUIRED_ATTR(input_size, ListInt) | |||
.REQUIRED_ATTR(strides, ListInt) | |||
.REQUIRED_ATTR(pads, ListInt) | |||
@@ -523,13 +516,12 @@ REG_OP(Conv2DBackpropInputD) | |||
*\n | |||
* The following are the supported data types and data formats:\n | |||
*\n | |||
| Tensor | x | filter | bias | y\n | |||
------------|---------|---------|---------|--------\n | |||
| Data Type | float16 | float16 | float16 | float16\n | |||
| |---------|---------|---------|--------\n | |||
| | int8 | int8 | int32 | int32\n | |||
------------|---------|---------|---------|--------\n | |||
| Format | NCHW | NCHW | ND | NCHW\n | |||
*\n | |||
| Tensor | x | filter | bias | y |\n | |||
|-----------|---------|---------|---------|--------|\n | |||
| Data Type | float16 | float16 | float16 | float16|\n | |||
| | int8 | int8 | int32 | int32 |\n | |||
| Format | NCHW | NCHW | ND | NCHW |\n | |||
*\n | |||
* For int8, a dequant or requant operator must be followed. | |||
*\n | |||
@@ -553,29 +545,24 @@ REG_OP(Conv2DBackpropInputD) | |||
*\n | |||
* The following value range restrictions must be met:\n | |||
*\n | |||
| Name | Field | Scope\n | |||
-------------------|----------|--------------\n | |||
| x (out_backprop) | H*strideH| [1, 200000]\n | |||
| | W*strideW| [1, 4096]\n | |||
-------------------|----------|--------------\n | |||
| Filter | H | [1, 255]\n | |||
| | W | [1, 255]\n | |||
-------------------|----------|--------------\n | |||
| y (fmap) | H | [1, 200000]\n | |||
| | W | [1, 4096]\n | |||
-------------------|----------|--------------\n | |||
| Stride | H | [1, 63]\n | |||
| | W | [1, 63]\n | |||
-------------------|----------|--------------\n | |||
| Padding | Top | [0, 255]\n | |||
| | Bottom | [0, 255]\n | |||
| | Left | [0, 255]\n | |||
| | Right | [0, 255]\n | |||
-------------------|----------|--------------\n | |||
| Dilation | H | [1, 255]\n | |||
| | W | [1, 255]\n | |||
-------------------|----------|--------------\n | |||
| Offset_x | | [-128, 127]\n | |||
*\n | |||
| Name | Field | Scope |\n | |||
|------------------|----------|--------------|\n | |||
| x (out_backprop) | H*strideH| [1, 200000] |\n | |||
| | W*strideW| [1, 4096] |\n | |||
| Filter | H | [1, 255] |\n | |||
| | W | [1, 255] |\n | |||
| y (fmap) | H | [1, 200000] |\n | |||
| | W | [1, 4096] |\n | |||
| Stride | H | [1, 63] |\n | |||
| | W | [1, 63] |\n | |||
| Padding | Top | [0, 255] |\n | |||
| | Bottom | [0, 255] |\n | |||
| | Left | [0, 255] |\n | |||
| | Right | [0, 255] |\n | |||
| Dilation | H | [1, 255] |\n | |||
| | W | [1, 255] |\n | |||
| Offset_x | | [-128, 127] |\n | |||
*\n | |||
* In Ascend910, fmap or out_backprop's H and W not support 1 when | |||
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 | |||
@@ -631,16 +618,14 @@ REG_OP(Deconvolution) | |||
*\n | |||
* The following are the supported data types and data formats:\n | |||
*\n | |||
| Tensor | x | out_backprop | y\n | |||
------------|---------|--------------|---------\n | |||
| Data Type | float16 | float16 | float16\n | |||
| |---------|--------------|---------\n | |||
| | float32 | float32 | float32\n | |||
| |---------|--------------|---------\n | |||
| | float64 | float64 | float64\n | |||
|-----------|---------|--------------|---------\n | |||
| Format | NCHW | NCHW | NCHW\n | |||
| | NHWC | NHWC | HWCN\n | |||
*\n | |||
| Tensor | x | out_backprop | y |\n | |||
|-----------|---------|--------------|---------|\n | |||
| Data Type | float16 | float16 | float16 |\n | |||
| | float32 | float32 | float32 |\n | |||
| | float64 | float64 | float64 |\n | |||
| Format | NCHW | NCHW | NCHW |\n | |||
| | NHWC | NHWC | HWCN |\n | |||
*\n | |||
* For float32 and float64 type of x and outbackprop, the actual calculation on the chip | |||
* is based on float16. | |||
@@ -662,30 +647,25 @@ REG_OP(Deconvolution) | |||
*\n | |||
* The following value range restrictions must be met:\n | |||
*\n | |||
| Name | Field | Scope\n | |||
-------------------|----------|--------------\n | |||
| x(fmap) | H | [1, 200000]\n | |||
| | W | [1, 4096]\n | |||
-------------------|----------|--------------\n | |||
| Filter Size | H | [1, 255]\n | |||
| | W | [1, 255]\n | |||
-------------------|----------|--------------\n | |||
| out_backprop | H | [1, 200000]\n | |||
| | W | [1, 4096]\n | |||
-------------------|----------|--------------\n | |||
| y | H | [1, 200000]\n | |||
| | W | [1, 4096]\n | |||
-------------------|----------|--------------\n | |||
| Stride | H | [1, 63]\n | |||
| | W | [1, 63]\n | |||
-------------------|----------|--------------\n | |||
| Padding | Top | [0, 255]\n | |||
| | Bottom | [0, 255]\n | |||
| | Left | [0, 255]\n | |||
| | Right | [0, 255]\n | |||
-------------------|----------|--------------\n | |||
| Dilation | H | [1, 255]\n | |||
| | W | [1, 255]\n | |||
*\n | |||
| Name | Field | Scope |\n | |||
|------------------|----------|--------------|\n | |||
| x(fmap) | H | [1, 200000] |\n | |||
| | W | [1, 4096] |\n | |||
| Filter Size | H | [1, 255] |\n | |||
| | W | [1, 255] |\n | |||
| out_backprop | H | [1, 200000] |\n | |||
| | W | [1, 4096] |\n | |||
| y | H | [1, 200000] |\n | |||
| | W | [1, 4096] |\n | |||
| Stride | H | [1, 63] |\n | |||
| | W | [1, 63] |\n | |||
| Padding | Top | [0, 255] |\n | |||
| | Bottom | [0, 255] |\n | |||
| | Left | [0, 255] |\n | |||
| | Right | [0, 255] |\n | |||
| Dilation | H | [1, 255] |\n | |||
| | W | [1, 255] |\n | |||
*\n | |||
*@par Outputs: | |||
* y: A Tensor. Has the same type as x, has the same format as filter_size. | |||
@@ -853,11 +833,11 @@ REG_OP(Conv2DBackpropFilterD) | |||
*@li Compatible with the Caffe operator 2D "Convolution". | |||
*/ | |||
REG_OP(Conv2D) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) | |||
.INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_BF16})) | |||
.INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_BF16})) | |||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_BF16})) | |||
.REQUIRED_ATTR(strides, ListInt) | |||
.REQUIRED_ATTR(pads, ListInt) | |||
.ATTR(dilations, ListInt, {1, 1, 1, 1}) | |||
@@ -1441,14 +1421,13 @@ REG_OP(Conv3DTransposeD) | |||
*\n | |||
* The following are the supported data types and data formats:\n | |||
*\n | |||
| Tensor | x | filter | bias | y\n | |||
------------|---------|---------|---------|--------\n | |||
| Data Type | float16 | float16 | float16 | float16\n | |||
| |---------|---------|---------|--------\n | |||
| | int8 | int8 | int32 | int32\n | |||
------------|---------|---------|---------|--------\n | |||
| Format | NCHW | NCHW | ND | NCHW\n | |||
| | NHWC | HWCN | | NHWC\n | |||
*\n | |||
| Tensor | x | filter | bias | y |\n | |||
|-----------|---------|---------|---------|--------|\n | |||
| Data Type | float16 | float16 | float16 | float16|\n | |||
| | int8 | int8 | int32 | int32 |\n | |||
| Format | NCHW | NCHW | ND | NCHW |\n | |||
| | NHWC | HWCN | | NHWC |\n | |||
*\n | |||
* For int8, a dequant or requant operator must be followed. | |||
*\n | |||
@@ -1476,32 +1455,26 @@ REG_OP(Conv3DTransposeD) | |||
*\n | |||
* The following value range restrictions must be met:\n | |||
*\n | |||
| Name | Field | Scope\n | |||
-------------------|----------|--------------\n | |||
| input_size | H | [1, 200000]\n | |||
| | W | [1, 4096]\n | |||
-------------------|----------|--------------\n | |||
| x (out_backprop) | H*strideH| [1, 200000]\n | |||
| | W*strideW| [1, 4096]\n | |||
-------------------|----------|--------------\n | |||
| filter | H | [1, 255]\n | |||
| | W | [1, 255]\n | |||
-------------------|----------|--------------\n | |||
| y (fmap) | H | [1, 200000]\n | |||
| | W | [1, 4096]\n | |||
-------------------|----------|--------------\n | |||
| Stride | H | [1, 63]\n | |||
| | W | [1, 63]\n | |||
-------------------|----------|--------------\n | |||
| Padding | Top | [0, 255]\n | |||
| | Bottom | [0, 255]\n | |||
| | Left | [0, 255]\n | |||
| | Right | [0, 255]\n | |||
-------------------|----------|--------------\n | |||
| Dilation | H | [1, 255]\n | |||
| | W | [1, 255]\n | |||
-------------------|----------|--------------\n | |||
| Offset_x | | [-128, 127]\n | |||
*\n | |||
| Name | Field | Scope |\n | |||
|------------------|----------|--------------|\n | |||
| input_size | H | [1, 200000] |\n | |||
| | W | [1, 4096] |\n | |||
| x (out_backprop) | H*strideH| [1, 200000] |\n | |||
| | W*strideW| [1, 4096] |\n | |||
| filter | H | [1, 255] |\n | |||
| | W | [1, 255] |\n | |||
| y (fmap) | H | [1, 200000] |\n | |||
| | W | [1, 4096] |\n | |||
| Stride | H | [1, 63] |\n | |||
| | W | [1, 63] |\n | |||
| Padding | Top | [0, 255] |\n | |||
| | Bottom | [0, 255] |\n | |||
| | Left | [0, 255] |\n | |||
| | Right | [0, 255] |\n | |||
| Dilation | H | [1, 255] |\n | |||
| | W | [1, 255] |\n | |||
| Offset_x | | [-128, 127] |\n | |||
*\n | |||
* In Ascend910, fmap or out_backprop's H and W not support 1 when | |||
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 | |||
@@ -135,7 +135,8 @@ REG_OP(CheckValid) | |||
* the value "4" refers to "x0", "x1", "y0", and "y1" . \n | |||
*@par Attributes: | |||
*mode: Computation mode, a character string with the value range of [iou, iof] . \n | |||
*@li mode: Computation mode, a character string with the value range of [iou, iof] | |||
*@li eps: An optional float, prevent division by 0, default value is 1.0 . \n | |||
*@par Outputs: | |||
*overlap: A 2D Tensor of type float16 or float32 with shape [M, N], specifying | |||
@@ -150,6 +151,7 @@ REG_OP(Iou) | |||
.INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(mode, String, "iou") | |||
.ATTR(eps, Float, 1.0) | |||
.OP_END_FACTORY_REG(Iou) | |||
/** | |||
@@ -205,7 +207,8 @@ the value "5" indicates the indexes of images where the ROIs are located, "x0", | |||
*@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image. | |||
*@li sample_num: An optional attribute of type int, specifying the horizontal and vertical | |||
sampling frequency of each output. If this attribute is set to "0", the sampling frequency is | |||
equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . \n | |||
equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . | |||
*@li roi_end_mode: An optional attribute of type int, specifying the align mode .\n | |||
*@par Outputs: | |||
*xdiff: Gradient added to input "features". Has the same 5HD shape as input "features". | |||
@@ -220,6 +223,7 @@ REG_OP(ROIAlignGrad) | |||
.REQUIRED_ATTR(pooled_height, Int) | |||
.REQUIRED_ATTR(spatial_scale, Float) | |||
.ATTR(sample_num, Int, 2) | |||
.ATTR(roi_end_mode, Int, 1) | |||
.OP_END_FACTORY_REG(ROIAlignGrad) | |||
/** | |||
@@ -578,6 +582,172 @@ REG_OP(Yolo) | |||
.ATTR(background, Bool, false) | |||
.ATTR(softmaxtree, Bool, false) | |||
.OP_END_FACTORY_REG(Yolo) | |||
/** | |||
*@brief Normalizes data. It is called Region on YOLO v2 and Yolo on YOLO v3 . \n | |||
*@par Inputs: | |||
*x: An NCHW tensor of type float16 or float32. The data is with shape (N, boxes*(coords+obj+classes), H, W), | |||
where, "obj" indicates the confidence of an object, and only one confidence is supported. Boxes are arranged | |||
as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n | |||
*@par Attributes: | |||
*@li boxes: A required int32, specifying the number of anchor boxes. Defaults to "5" for V2 or "3" for V3. | |||
*@li coords: An int32, specifying the number of parameters required for locating an object. The value is fixed at "4", corresponding to (x,y,w,h). | |||
*@li classes: An int32, specifying the number of prediction classes. Defaults to "80". The value range is [1, 1024]. | |||
*@li yolo_version: A string, specifying the YOLO version, either "V2" or "V3".Defaults to "V3" | |||
*@li softmax: A bool, specifying whether to perform softmax, valid only when "yolo_version = V2". Defaults to "false". | |||
*@li background: A bool, specifying the operation types of the obj and classes, used in conjunction with "softmax" and valid only when "yolo_version = V2". Defaults to "false". | |||
*@li softmaxtree: A bool, Fixed to False, defined in Lite, but not used. Defaults to "false" . \n | |||
*@par Outputs: | |||
*@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2], | |||
* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box. | |||
*@li obj_prob: A float16 or float32 with shape [N, ceilx(boxes*height*width *2+32, 32)/2], | |||
* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence. | |||
*@li classes_prob: A float16 or float32 with shape [N, classes, ceilx(boxes*height*width *2+32, 32)/2], | |||
* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes . \n | |||
*@attention Constraints: | |||
*@li This operator applies to YOLO v2,v3 and v5 networks. | |||
*@li The succeeding layer of the Yolo operator must be operator Yolov5DetectionOutput. | |||
*@par Third-party framework compatibility | |||
* It is a custom operator. | |||
*/ | |||
REG_OP(YoloPreDetection) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(boxes, Int, 3) | |||
.ATTR(coords, Int, 4) | |||
.ATTR(classes, Int, 80) | |||
.ATTR(yolo_version, String, "V5") | |||
.ATTR(softmax, Bool, false) | |||
.ATTR(background, Bool, false) | |||
.ATTR(softmaxtree, Bool, false) | |||
.OP_END_FACTORY_REG(YoloPreDetection) | |||
/** | |||
*@brief Performs YOLO V5 detection . \n | |||
*@par Inputs: | |||
*Ten inputs, including: | |||
*@li Operator Yolov5DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n | |||
There are three Yolo operators at Yolov5DetectionOutput's preceding layer on Yolo v5. For details, see the description of operator Yolo. | |||
*@li img_info: A float16 or float32, describing the image information including the required image height and width \n | |||
* and the actual image height and width. | |||
*@par Attributes: | |||
*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" | |||
*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. | |||
*@li coords: Specifies the number of coordinate parameters. Must be 4. | |||
*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80]. | |||
*@li relative: An optional bool. Defaults to and must be "true". | |||
*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0]. | |||
*@li post_nms_topn: An optional int32. This attribute is reserved. | |||
*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. | |||
*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n | |||
*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". | |||
*@par Outputs: | |||
*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2), | |||
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. | |||
*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes. | |||
* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 | |||
*@attention Constraints:\n | |||
*@li This operator applies only to the YOLO v5 network. | |||
*@li The preceding layer of operator Yolov5DetectionOutput must be three Yolo operators. | |||
*@see Yolo() | |||
*@par Third-party framework compatibility | |||
* It is a custom operator. It has no corresponding operator in Caffe. | |||
*/ | |||
REG_OP(YoloV5DetectionOutput) | |||
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.REQUIRED_ATTR(biases, ListFloat) | |||
.ATTR(boxes, Int, 3) | |||
.ATTR(coords, Int, 4) | |||
.ATTR(classes, Int, 80) | |||
.ATTR(relative, Bool, true) | |||
.ATTR(obj_threshold, Float, 0.5) | |||
.ATTR(post_nms_topn, Int, 512) | |||
.ATTR(score_threshold, Float, 0.5) | |||
.ATTR(iou_threshold, Float, 0.45) | |||
.ATTR(pre_nms_topn, Int, 512) | |||
.ATTR(N, Int, 10) | |||
.ATTR(resize_origin_img_to_net, Bool, false) | |||
.ATTR(out_box_dim, Int, 3) | |||
.ATTR(alpha, Float, 2.0) | |||
.OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(box_out_num, TensorType({DT_INT32})) | |||
.OP_END_FACTORY_REG(YoloV5DetectionOutput) | |||
/** | |||
*@brief Performs YOLO V5 detection. | |||
*@par Inputs: | |||
*16 Input, including: | |||
*@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v5) are used as the inputs of operator Yolov5DetectionOutput. | |||
* A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. | |||
*@li imginfo: A float16, describing the image information including the required image height and width | |||
* and the actual image height and width. | |||
*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. | |||
* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] | |||
* is formed for the three Yolo outputs, respectively .It's a dynamic input. \n | |||
*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n | |||
*@par Attributes: | |||
*@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" | |||
*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. | |||
*@li coords: Specifies the number of coordinate parameters. Must be 4. | |||
*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80]. | |||
*@li relative: An optional bool. Defaults to and must be "true". | |||
*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0]. | |||
*@li post_nms_topn: An optional int32. This attribute is reserved. | |||
*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. | |||
*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0]. | |||
*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". | |||
* | |||
*@par Outputs: | |||
*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2), | |||
* describing the information of each output box. | |||
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. | |||
*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes. | |||
* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 | |||
* | |||
*@attention Constraints: | |||
*@li This operator applies only to the YOLO v5 network. | |||
*@li The preceding layer of operator Yolov5DetectionOutput must be three Yolo operators. | |||
*@see Yolo() | |||
*@par Third-party framework compatibility | |||
* It is a custom operator. | |||
*/ | |||
REG_OP(YoloV5DetectionOutputD) | |||
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.REQUIRED_ATTR(biases, ListFloat) | |||
.ATTR(boxes, Int, 3) | |||
.ATTR(coords, Int, 4) | |||
.ATTR(classes, Int, 80) | |||
.ATTR(relative, Bool, true) | |||
.ATTR(obj_threshold, Float, 0.5) | |||
.ATTR(post_nms_topn, Int, 512) | |||
.ATTR(score_threshold, Float, 0.5) | |||
.ATTR(iou_threshold, Float, 0.45) | |||
.ATTR(pre_nms_topn, Int, 512) | |||
.ATTR(N, Int, 10) | |||
.ATTR(resize_origin_img_to_net, Bool, false) | |||
.ATTR(out_box_dim, Int, 3) | |||
.ATTR(alpha, Float, 2.0) | |||
.OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(box_out_num, TensorType({DT_INT32})) | |||
.OP_END_FACTORY_REG(YoloV5DetectionOutputD) | |||
/** | |||
*@brief Performs YOLO V2 detection . \n | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved. | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -2645,6 +2645,19 @@ REG_OP(SparseApplyAdadeltaD) | |||
REG_OP(AtomicAddrClean) | |||
.ATTR(automic_add_mem_size, ListInt, {}) | |||
.OP_END_FACTORY_REG(AtomicAddrClean) | |||
/** | |||
*@brief Clean memory of workspace list . \n | |||
*@par Attributes: | |||
* @li workspace_size: sizes of workspaces . \n | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(DynamicAtomicAddrClean) | |||
.ATTR(automic_add_mem_size, ListInt, {}) | |||
.OP_END_FACTORY_REG(DynamicAtomicAddrClean) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_TRAINING_OPS_H_ |
@@ -59,6 +59,25 @@ REG_OP(HardSwish) | |||
.OP_END_FACTORY_REG(HardSwish) | |||
/** | |||
*@brief Computes the gradient for the hard_swish of "x" . \n | |||
* @par Inputs: | |||
*Two inputs, including: | |||
* @li grad: A Tensor. Must be one of the following types: float16, float32 | |||
* @li x: A Tensor of the same type as "grad" . \n | |||
*@par Outputs: | |||
*y: A Tensor. Has the same type as "grad". | |||
* @par Third-party framework compatibility | |||
* Compatible with the Torch operator HardSwishGrad. | |||
*/ | |||
REG_OP(HardSwishGrad) | |||
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OP_END_FACTORY_REG(HardSwishGrad) | |||
/** | |||
*@brief Computes the for the Swish of "x" . \n | |||
*@par Inputs: | |||
@@ -81,6 +100,29 @@ REG_OP(Swish) | |||
.OP_END_FACTORY_REG(Swish) | |||
/** | |||
*@brief Computes the gradient for the Swish of "x" . \n | |||
*@par Inputs: | |||
*Three inputs, including: | |||
* @li grad: A Tensor. Must be one of the following types: float16, float32 | |||
* @li x: A Tensor of the same type as "grad". | |||
* @li y: A Tensor of the same type as "grad" . \n | |||
* @par Attributes: | |||
* scale: A optional scalar. The data type is float . \n | |||
*@par Outputs: | |||
*grad_x: A Tensor. Has the same type as "grad". | |||
*@par Third-party framework compatibility | |||
*Compatible with the Torch operator SwishGrad | |||
*/ | |||
REG_OP(SwishGrad) | |||
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(grad_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(scale, Float, 1.0) | |||
.OP_END_FACTORY_REG(SwishGrad) | |||
/** | |||
*@brief Computes the gradient for the gelu of "x" . \n | |||
*@par Inputs: | |||
@@ -274,6 +274,38 @@ REG_OP(PadV3) | |||
.ATTR(mode, String, "constant") | |||
.ATTR(paddings_contiguous, Bool, true) | |||
.OP_END_FACTORY_REG(PadV3) | |||
/** | |||
*@brief Cal the grad of Pads. | |||
*@par Inputs: | |||
*Two inputs, including: | |||
* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32, | |||
* uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, | |||
* complex128, uint32, uint64. | |||
* @li paddings: A Tensor of type int32 or int64. | |||
*@par Attributes: | |||
* @li mode: An optional string, Defaults to "reflect", indicates paddings mode, | |||
* support "reflect", "edge" | |||
* @li paddings_contiguous: An optional bool value, Defaults to true. | |||
* If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...] | |||
* If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...] | |||
*@par Outputs: | |||
*y: A Tensor of the same type as "x". | |||
*@par Third-party framework compatibility: | |||
* Compatible with ONNX operator PadGrad. | |||
*/ | |||
REG_OP(PadV3Grad) | |||
.INPUT(x, TensorType::BasicType()) | |||
.INPUT(paddings, TensorType::IndexNumberType()) | |||
.OUTPUT(y, TensorType::BasicType()) | |||
.ATTR(mode, String, "reflect") | |||
.ATTR(paddings_contiguous, Bool, true) | |||
.OP_END_FACTORY_REG(PadV3Grad) | |||
/** | |||
*@brief Pads a tensor. | |||
@@ -685,6 +685,24 @@ REG_OP(Uniform) | |||
.ATTR(from, Float, 0.0) | |||
.ATTR(to, Float, 1.0) | |||
.OP_END_FACTORY_REG(Uniform) | |||
} // namespace ge | |||
/** | |||
*@brief Outputs integers consisting of 0 and 1, used for lstm etc. \n | |||
*@par Inputs | |||
* @li time_step: A tensor with data type int64. 0-D. | |||
* @li batch_size: A tensor with data type int64. 0-D. | |||
*@par Outputs: | |||
*y: A Tensor. Has the type float16 or float, 2-D, [time_step,batch_size]. \n | |||
*@attention Constraints: | |||
* Compatible with the Caffe operator ContinuationIndicator. | |||
*/ | |||
REG_OP(ContinuationIndicator) | |||
.REQUIRED_ATTR(time_step, Int) | |||
.REQUIRED_ATTR(batch_size, Int) | |||
.OUTPUT(y, TensorType({DT_FLOAT})) | |||
.OP_END_FACTORY_REG(ContinuationIndicator) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ |
@@ -1275,7 +1275,7 @@ REG_OP(ReduceStd) | |||
* @par Attributes: | |||
* Three Attributes, including: | |||
* Five Attributes, including: | |||
* @li dim: An optional listint, Defaults to "None". \n | |||
* @li unbiased: An optional bool. Defaults to "True". | |||
* If "True", Use Bessel Correction. | |||
@@ -1283,9 +1283,14 @@ REG_OP(ReduceStd) | |||
* @li keepdim: An optional bool. Defaults to "False". | |||
* If "True", Keep the original tensor dimension. | |||
* If "False", Do not keep the original tensor dimension. \n | |||
* @li invert: An optional bool, Defaults to "False". | |||
* If "True", the output is inverse of variance. | |||
* If "False", the output is variance. | |||
* @li epsilon: An optional floar, Defaults to 0.001. | |||
* Prevent division by 0. | |||
* @par Outputs: | |||
* @li y: A Tensor. It's the std of X. Has the same type as "x". | |||
* @li y: A Tensor. It's the variance of X or reciprocal of vaiance of X. Has the same type as "x". | |||
* @par Third-party framework compatibility | |||
* Compatible with the Pytorch operator ReduceStdWithMean. | |||
@@ -1297,6 +1302,8 @@ REG_OP(ReduceStdWithMean) | |||
.ATTR(dim, ListInt, {}) | |||
.ATTR(unbiased, Bool, true) | |||
.ATTR(keepdim, Bool, false) | |||
.ATTR(invert, Bool, false) | |||
.ATTR(epsilon, Float, 0.001) | |||
.OP_END_FACTORY_REG(ReduceStdWithMean) | |||
} //namespace ge | |||
@@ -822,7 +822,7 @@ REG_OP(DynamicGRU) | |||
*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. | |||
*@li bias_input:Must be one of the following types: float16, float32. The format must be ND. | |||
*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. | |||
*@li seq_length:Must be one of the following types: int32. The format must be ND. | |||
*@li seq_length:Must be one of the following types: float16 in FRACTAL_NZ and int32 in ND. | |||
*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@par Attributes: | |||
@@ -852,7 +852,7 @@ REG_OP(DynamicGRUV2) | |||
.INPUT(weight_hidden, TensorType({DT_FLOAT16})) | |||
.OPTIONAL_INPUT(bias_input, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) | |||
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16})) | |||
.OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
@@ -880,7 +880,7 @@ REG_OP(DynamicGRUV2) | |||
*@li x_weight_input:Must be one of the following types: float32. The format must be FRACTAL_NZ. | |||
*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. | |||
*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. | |||
*@li seq_length:Must be one of the following types: int32. The format must be ND. | |||
*@li seq_length:Must be one of the following types: float16 in FRACTAL_NZ and int32 in ND. | |||
*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@par Attributes: | |||
@@ -913,7 +913,7 @@ REG_OP(DynamicGRUV2Hidden) | |||
.INPUT(x_weight_input, TensorType({DT_FLOAT32})) | |||
.INPUT(weight_hidden, TensorType({DT_FLOAT16})) | |||
.OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32})) | |||
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16})) | |||
.OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
@@ -1050,6 +1050,50 @@ REG_OP(GRUV2HiddenGradCell) | |||
.OP_END_FACTORY_REG(GRUV2HiddenGradCell) | |||
/** | |||
*@brief: DynamicGRUCellGrad calculation. | |||
*@par Inputs: | |||
*ten inputs: \n | |||
*@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.+ | |||
*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li t_state:A 1D Tensor. Must be one of the following types: int32. The format must be ND. | |||
*@par Attributes: | |||
*gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option. | |||
*@par Outputs: | |||
*three outputs: \n | |||
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(DynamicGRUCellGrad) | |||
.INPUT(dh_pre_t, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(update, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(new, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(t_state, TensorType({DT_INT32, DT_INT32})) | |||
.OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.ATTR(gate_order, String, "zrh") | |||
.OP_END_FACTORY_REG(DynamicGRUCellGrad) | |||
/** | |||
* @brief Calculates the reversed outputs of the function "embedding". \n | |||
* @par Inputs: | |||
@@ -1137,8 +1181,8 @@ REG_OP(CommonLSTM) | |||
* | |||
* @par Inputs: | |||
* @li seq_length: A 1D Tensor. Must be one of the following types: int32. Record the current length of each batch. [batch_size]. | |||
* @li b: A 1D Tensor. Must be one of the following types: fp16/fp32. Record the hidden_size. [4 * hidden_size]. | |||
* @li x: A 3D Tensor. Must be one of the following types: fp16/fp32. Record the num_step/batch_size/input_size. [num_step, batch_size, input_size]. | |||
* @li hidden_size: An optional attribute of type int32. pass the hidden_size. \n | |||
* | |||
* @par Outputs: | |||
* seq_mask: A 3D Tensor. Must be one of the following types: fp16/fp32. with the shape of [num_step, batch_size, hidden_size]. And has the same type as "b" \n | |||
@@ -1148,8 +1192,8 @@ REG_OP(CommonLSTM) | |||
*/ | |||
REG_OP(RnnGenMaskV2) | |||
.INPUT(seq_length, TensorType({DT_INT32})) | |||
.INPUT(b, TensorType({{DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.REQUIRED_ATTR(hidden_size, Int) | |||
.OUTPUT(seq_mask, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OP_END_FACTORY_REG(RnnGenMaskV2) | |||
@@ -2408,6 +2408,40 @@ REG_OP(TopKPQDistanceMerge) | |||
.OUTPUT(topk_index, TensorType({DT_INT32})) | |||
.REQUIRED_ATTR(k, Int) | |||
.OP_END_FACTORY_REG(TopKPQDistanceMerge) | |||
/** | |||
*@brief Extracts a strided slice of a tensor. Roughly speaking, this op | |||
extracts a slice of size (end-begin)/stride from the given input tensor. | |||
Starting at the location specified by begin the slice continues by | |||
adding stride to the index until all dimensions are not less than end. | |||
*@par Inputs: | |||
*Four inputs, including: | |||
* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, | |||
* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, | |||
* complex128, float16, uint32, uint64. | |||
* @li begin: A Tensor of type int32 or int64, for the index of the first value to select . \n | |||
* @li end: A Tensor of type int32 or int64, for the index of the last value to select . \n | |||
* @li strides: A Tensor of type int32 or int64, for the increment . \n | |||
* @li axes: A Tensor of type int32 or int64, for the increment . \n | |||
*@par Outputs: | |||
*y: A Tensor. Has the same type as "x" . \n | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(StridedSliceV3) | |||
.INPUT(x, TensorType::BasicType()) | |||
.INPUT(begin, TensorType::IndexNumberType()) | |||
.INPUT(end, TensorType::IndexNumberType()) | |||
.OPTIONAL_INPUT(axes, TensorType::IndexNumberType()) | |||
.OPTIONAL_INPUT(strides, TensorType::IndexNumberType()) | |||
.OUTPUT(y, TensorType::BasicType()) | |||
.OP_END_FACTORY_REG(StridedSliceV3) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ |
@@ -368,8 +368,9 @@ REG_OP(SpaceToDepth) | |||
* complex128, uint32, uint64 | |||
*@par Attributes: | |||
*Two attributes, including: | |||
*Three attributes, including: | |||
* @li block_size: An int >= 2, specifying the size of the spatial block. | |||
* @li mode: An optional string, specifying the mode. Defaults to "DCR". | |||
* @li data_format: An optional string, specifying the data format. Defaults to "NHWC" . \n | |||
*@par Outputs: | |||
@@ -382,6 +383,7 @@ REG_OP(DepthToSpace) | |||
.INPUT(x, TensorType::BasicType()) | |||
.OUTPUT(y, TensorType::BasicType()) | |||
.REQUIRED_ATTR(block_size, Int) | |||
.ATTR(mode, String, "DCR") | |||
.ATTR(data_format, String, "NHWC") | |||
.OP_END_FACTORY_REG(DepthToSpace) | |||
@@ -845,7 +847,11 @@ with the same setting for this option. Default: False \n | |||
selected indices from the boxes tensor, where M <= max_output_size. \n | |||
*@attention Constraints: | |||
*Input theta must be float16 or float, output_size must be int32 type . \n | |||
*Input theta must be float16 or float, output_size must be int32 type . | |||
The current implementation of AffineGrid operator AiCore adopts | |||
BatchMatMul's FP16 fusion operator scheme, and the accuracy will | |||
decrease when the theta range exceeds [-10,10].If the model requires | |||
high accuracy of AffineGrid, it is recommended to use AICPU. \n | |||
*@par Third-party framework compatibility | |||
*Compatible with Pytorch affine_grid operator. | |||
@@ -34,7 +34,7 @@ namespace ge { | |||
* @li bucket_list: A Tensor. Must be one of the following types: int32, int64. | |||
* | |||
* @par Outputs: | |||
* @li adc_tables: A Tensor. Must be one of the following types: float16, float32. | |||
* adc_tables: A Tensor. Must be one of the following types: float16, float32. | |||
*/ | |||
REG_OP(GenADC) | |||
.INPUT(query, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
@@ -43,6 +43,87 @@ REG_OP(GenADC) | |||
.INPUT(bucket_list, TensorType({DT_INT32, DT_INT64})) | |||
.OUTPUT(adc_tables, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OP_END_FACTORY_REG(GenADC) | |||
/** | |||
* @brief Finds values and indices of the "k" largest or least elements for the last dimension. \n | |||
* | |||
* @par Inputs: | |||
* Dynamin inputs, including: | |||
* @li actual_count: A Tensor of type int32, the actual number of pq_distance. | |||
* @li pq_distance: A Tensor, Will be updated after calculation. Must be one of the following types: float32, float16. | |||
* @li grouped_extreme_distance: A Tensor, the extremum in each group. Must be one of the following types: float32, float16. | |||
* @li pq_index: A Tensor of type int32, index corresponding to pq_distance. | |||
* @li pq_ivf: A Tensor of type int32 , the bucket number corresponding to pq_distance. | |||
* | |||
* @par Attributes: | |||
* @li order: A string, indicates the sorting method of topk_pq_distance. \n | |||
* @li k: Int, k maximum or minimum values. \n | |||
* @li group_size: Int, the group size of the extremum. \n | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(TopKPQDistance) | |||
.DYNAMIC_INPUT(actual_count, TensorType({DT_INT32})) | |||
.DYNAMIC_INPUT(pq_distance, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.DYNAMIC_INPUT(grouped_extreme_distance, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.DYNAMIC_INPUT(pq_ivf, TensorType({DT_INT32})) | |||
.DYNAMIC_INPUT(pq_index, TensorType({DT_INT32})) | |||
.OUTPUT(topk_distance, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(topk_ivf, TensorType({DT_INT32})) | |||
.OUTPUT(topk_index, TensorType({DT_INT32})) | |||
.ATTR(order, String, "ASC") | |||
.ATTR(k, Int, 0) | |||
.ATTR(group_size, Int, 0) | |||
.OP_END_FACTORY_REG(TopKPQDistance) | |||
/** | |||
* @brief Calculate PQ distance. \n | |||
* | |||
* @par Inputs: | |||
* Six inputs, including: | |||
* @li ivf: A Tensor, dtype is uint8. | |||
* @li bucket_list: A Tensor, dtype is int32. | |||
* @li bucket_base_distance: A Tensor, dtype is float16. | |||
* @li bucket_limits: A Tensor, dtype is int32. | |||
* @li bucket_offsets: A Tensor, dtype is int32. | |||
* @li adc_tables: A Tensor. dtype is float16. \n | |||
* | |||
* @par Outputs: | |||
* Five outputs, including: | |||
* @li actual_count: A Tensor, dtype is int32, the first element means the length of processed ivf. | |||
* @li pq_distance: A Tensor, dtype is float16. | |||
* @li grouped_extreme_distance: A Tensor, dtype is float16. | |||
* @li pq_ivf: A Tensor, dtype is int32. | |||
* @li pq_index: A Tensor, dtype is int32. \n | |||
* | |||
* @par Attributes: | |||
* Five attributes, including: | |||
* @li group_size: A Scalar, indicates the group size when compute grouped_extreme_distance. | |||
* @li total_limit: A Scalar, indicates the total length of the outputs. | |||
* @li extreme_mode: A Scalar, indicates the type of extremum, 0 means minimum, and 1 means maximum. | |||
* @li split_count: A Scalar. | |||
* @li split_index: A Scalar. \n | |||
* | |||
*/ | |||
REG_OP(ScanPQCodes) | |||
.INPUT(ivf, TensorType({DT_UINT8})) | |||
.INPUT(bucket_list, TensorType({DT_INT32, DT_INT64})) | |||
.INPUT(bucket_base_distance, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.INPUT(bucket_limits, TensorType({DT_INT32})) | |||
.INPUT(bucket_offsets, TensorType({DT_INT64})) | |||
.INPUT(adc_tables, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OUTPUT(actual_count, TensorType({DT_INT32})) | |||
.OUTPUT(pq_distance, TensorType({DT_FLOAT16})) | |||
.OUTPUT(grouped_extreme_distance, TensorType({DT_FLOAT16})) | |||
.OUTPUT(pq_ivf, TensorType({DT_INT32})) | |||
.OUTPUT(pq_index, TensorType({DT_INT32})) | |||
.REQUIRED_ATTR(total_limit, Int) | |||
.ATTR(group_size, Int, 64) | |||
.ATTR(extreme_mode, Int, 0) | |||
.ATTR(split_count, Int, 1) | |||
.ATTR(split_index, Int, 0) | |||
.OP_END_FACTORY_REG(ScanPQCodes) | |||
} // namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_ |
@@ -42,7 +42,7 @@ static const int32_t RT_ERROR_NONE = 0; // success | |||
*/ | |||
typedef enum tagRtDeviceMode { | |||
RT_DEVICE_MODE_SINGLE_DIE = 0, | |||
RT_DEVICE_MODE_MULTI_DIE = 1, | |||
RT_DEVICE_MODE_MULTI_DIE, | |||
RT_DEVICE_MODE_RESERVED | |||
} rtDeviceMode; | |||
@@ -178,7 +178,7 @@ RTS_API rtError_t rtProfilerInit(const char *profDir, const char *address, const | |||
* @ingroup profiling_base | |||
* @brief config rts profiler. | |||
*/ | |||
RTS_API rtError_t rtProfilerConfig(uint16_t type); | |||
RTS_API rtError_t rtProfilerConfig(uint16_t profConfig); | |||
/** | |||
* @ingroup profiling_base | |||
@@ -251,18 +251,6 @@ RTS_API rtError_t rtProfRegisterCtrlCallback(uint32_t moduleId, rtProfCtrlHandle | |||
/** | |||
* @ingroup dvrt_base | |||
* @brief Returns the last error from a runtime call. | |||
*/ | |||
RTS_API rtError_t rtGetLastError(); | |||
/** | |||
* @ingroup dvrt_base | |||
* @brief Returns the last error from a runtime call. | |||
*/ | |||
RTS_API rtError_t rtPeekAtLastError(); | |||
/** | |||
* @ingroup dvrt_base | |||
* @brief register callback for error code | |||
* @param [out] NA | |||
* @return RT_ERROR_NONE for ok | |||
@@ -14,8 +14,8 @@ | |||
* limitations under the License. | |||
*/ | |||
#ifndef __CCE_RUNTIME_CONFIG_H__ | |||
#define __CCE_RUNTIME_CONFIG_H__ | |||
#ifndef CCE_RUNTIME_CONFIG_H | |||
#define CCE_RUNTIME_CONFIG_H | |||
#include "base.h" | |||
@@ -23,28 +23,28 @@ | |||
extern "C" { | |||
#endif | |||
#define PLAT_COMBINE(arch, chip, ver) ((arch << 16) | (chip << 8) | (ver)) | |||
#define PLAT_GET_ARCH(type) ((type >> 16) & 0xffff) | |||
#define PLAT_GET_CHIP(type) ((type >> 8) & 0xff) | |||
#define PLAT_GET_VER(type) (type & 0xff) | |||
#define PLAT_COMBINE(arch, chip, ver) (((arch) << 16U) | ((chip) << 8U) | (ver)) | |||
#define PLAT_GET_ARCH(type) (((type) >> 16U) & 0xffffU) | |||
#define PLAT_GET_CHIP(type) (((type) >> 8U) & 0xffU) | |||
#define PLAT_GET_VER(type) ((type) & 0xffU) | |||
typedef enum tagRtArchType { | |||
ARCH_BEGIN = 0, | |||
ARCH_V100 = ARCH_BEGIN, | |||
ARCH_V200, | |||
ARCH_END, | |||
ARCH_V200 = 1, | |||
ARCH_END = 2, | |||
} rtArchType_t; | |||
typedef enum tagRtChipType { | |||
CHIP_BEGIN = 0, | |||
CHIP_MINI = CHIP_BEGIN, | |||
CHIP_CLOUD, | |||
CHIP_MDC, | |||
CHIP_LHISI, | |||
CHIP_DC, | |||
CHIP_CLOUD_V2, | |||
CHIP_NO_DEVICE, | |||
CHIP_END, | |||
CHIP_CLOUD = 1, | |||
CHIP_MDC = 2, | |||
CHIP_LHISI = 3, | |||
CHIP_DC = 4, | |||
CHIP_CLOUD_V2 = 5, | |||
CHIP_NO_DEVICE = 6, | |||
CHIP_END = 7, | |||
} rtChipType_t; | |||
typedef enum tagRtAicpuScheType { | |||
@@ -59,29 +59,32 @@ typedef enum tagRtDeviceCapabilityType { | |||
RT_SCHEDULE_HARDWARE, // HWTS Schedule | |||
RT_AICPU_BLOCKING_OP_NOT_SUPPORT, | |||
RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation | |||
RT_MODE_NO_FFTS, // no ffts | |||
RT_MODE_FFTS, // 1981 get ffts work mode, ffts | |||
RT_MODE_FFTS_PLUS, // 1981 get ffts work mode, ffts plus | |||
} rtDeviceCapabilityType; | |||
typedef enum tagRtVersion { | |||
VER_BEGIN = 0, | |||
VER_NA = VER_BEGIN, | |||
VER_ES, | |||
VER_CS, | |||
VER_SD3403, | |||
VER_END, | |||
VER_ES = 1, | |||
VER_CS = 2, | |||
VER_SD3403 = 3, | |||
VER_END = 4, | |||
} rtVersion_t; | |||
/* match rtChipType_t */ | |||
typedef enum tagRtPlatformType { | |||
PLATFORM_BEGIN = 0, | |||
PLATFORM_MINI_V1 = PLATFORM_BEGIN, | |||
PLATFORM_CLOUD_V1, | |||
PLATFORM_MINI_V2, | |||
PLATFORM_LHISI_ES, | |||
PLATFORM_LHISI_CS, | |||
PLATFORM_DC, | |||
PLATFORM_CLOUD_V2, | |||
PLATFORM_LHISI_SD3403, | |||
PLATFORM_END, | |||
PLATFORM_CLOUD_V1 = 1, | |||
PLATFORM_MINI_V2 = 2, | |||
PLATFORM_LHISI_ES = 3, | |||
PLATFORM_LHISI_CS = 4, | |||
PLATFORM_DC = 5, | |||
PLATFORM_CLOUD_V2 = 6, | |||
PLATFORM_LHISI_SD3403 = 7, | |||
PLATFORM_END = 8, | |||
} rtPlatformType_t; | |||
typedef enum tagRtCubeFracMKNFp16 { | |||
@@ -240,4 +243,4 @@ RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout); | |||
} | |||
#endif | |||
#endif // __CCE_RUNTIME_STREAM_H__ | |||
#endif // CCE_RUNTIME_CONFIG_H |
@@ -14,8 +14,8 @@ | |||
* limitations under the License. | |||
*/ | |||
#ifndef __CCE_RUNTIME_CONTEXT_H__ | |||
#define __CCE_RUNTIME_CONTEXT_H__ | |||
#ifndef CCE_RUNTIME_CONTEXT_H | |||
#define CCE_RUNTIME_CONTEXT_H | |||
#include "base.h" | |||
@@ -173,4 +173,4 @@ RTS_API rtError_t rtSetCtxINFMode(bool mode); | |||
#endif | |||
#endif // __CCE_RUNTIME_CONTEXT_H__ | |||
#endif // CCE_RUNTIME_CONTEXT_H |
@@ -14,8 +14,8 @@ | |||
* limitations under the License. | |||
*/ | |||
#ifndef __CCE_RUNTIME_DEVICE_H__ | |||
#define __CCE_RUNTIME_DEVICE_H__ | |||
#ifndef CCE_RUNTIME_DEVICE_H | |||
#define CCE_RUNTIME_DEVICE_H | |||
#include "base.h" | |||
@@ -23,8 +23,8 @@ | |||
extern "C" { | |||
#endif | |||
#define RT_CAPABILITY_SUPPORT (0x1) | |||
#define RT_CAPABILITY_NOT_SUPPORT (0x0) | |||
#define RT_CAPABILITY_SUPPORT (0x1U) | |||
#define RT_CAPABILITY_NOT_SUPPORT (0x0U) | |||
typedef struct tagRTDeviceInfo { | |||
uint8_t env_type; // 0: FPGA 1: EMU 2: ESL | |||
@@ -45,27 +45,28 @@ typedef struct tagRTDeviceInfo { | |||
typedef enum tagRtRunMode { | |||
RT_RUN_MODE_OFFLINE = 0, | |||
RT_RUN_MODE_ONLINE = 1, | |||
RT_RUN_MODE_AICPU_SCHED = 2, | |||
RT_RUN_MODE_ONLINE, | |||
RT_RUN_MODE_AICPU_SCHED, | |||
RT_RUN_MODE_RESERVED | |||
} rtRunMode; | |||
typedef enum tagRtAicpuDeployType { | |||
AICPU_DEPLOY_CROSS_OS = 0x0, | |||
AICPU_DEPLOY_CROSS_PROCESS = 0x1, | |||
AICPU_DEPLOY_CROSS_THREAD = 0x2, | |||
AICPU_DEPLOY_CROSS_PROCESS, | |||
AICPU_DEPLOY_CROSS_THREAD, | |||
AICPU_DEPLOY_RESERVED | |||
} rtAicpuDeployType_t; | |||
typedef enum tagRtFeatureType { | |||
FEATURE_TYPE_MEMCPY = 0, | |||
FEATURE_TYPE_MEMORY = 1, | |||
FEATURE_TYPE_MEMORY, | |||
FEATURE_TYPE_RSV | |||
} rtFeatureType_t; | |||
typedef enum tagRtDeviceFeatureType { | |||
FEATURE_TYPE_SCHE, | |||
FEATURE_TYPE_BLOCKING_OPERATOR, | |||
FEATURE_TYPE_FFTS_MODE, | |||
FEATURE_TYPE_END, | |||
} rtDeviceFeatureType_t; | |||
@@ -90,6 +91,15 @@ typedef enum tagRtDeviceModuleType { | |||
RT_MODULE_TYPE_VECTOR_CORE, /**< VECTOR CORE info*/ | |||
} rtDeviceModuleType_t; | |||
// used for rtGetDevMsg callback function | |||
typedef void (*rtGetMsgCallback)(const char *msg, uint32_t len); | |||
typedef enum tagGetDevMsgType { | |||
RT_GET_DEV_ERROR_MSG = 0, | |||
RT_GET_DEV_RUNNING_STREAM_SNAPSHOT_MSG, | |||
RT_GET_DEV_MSG_RESERVE | |||
} rtGetDevMsgType_t; | |||
/** | |||
* @ingroup dvrt_dev | |||
* @brief get total device number. | |||
@@ -408,8 +418,17 @@ RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device); | |||
*/ | |||
RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device); | |||
/** | |||
* @ingroup dvrt_dev | |||
* @brief get device message | |||
* @param [in] rtGetDevMsgType_t getMsgType:msg type | |||
* @param [in] GetMsgCallback callback:acl callback function | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtGetDevMsg(rtGetDevMsgType_t getMsgType, rtGetMsgCallback callback); | |||
#if defined(__cplusplus) | |||
} | |||
#endif | |||
#endif // __CCE_RUNTIME_DEVICE_H__ | |||
#endif // CCE_RUNTIME_DEVICE_H |
@@ -14,8 +14,8 @@ | |||
* limitations under the License. | |||
*/ | |||
#ifndef __CCE_RUNTIME_DVFSPROFILE_H__ | |||
#define __CCE_RUNTIME_DVFSPROFILE_H__ | |||
#ifndef CCE_RUNTIME_DVFSPROFILE_H | |||
#define CCE_RUNTIME_DVFSPROFILE_H | |||
#include "base.h" | |||
@@ -60,4 +60,4 @@ RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode); | |||
} | |||
#endif | |||
#endif // __CCE_RUNTIME_PROFILE_H__ | |||
#endif // CCE_RUNTIME_DVFSPROFILE_H |
@@ -14,8 +14,8 @@ | |||
* limitations under the License. | |||
*/ | |||
#ifndef __CCE_RUNTIME_EVENT_H__ | |||
#define __CCE_RUNTIME_EVENT_H__ | |||
#ifndef CCE_RUNTIME_EVENT_H | |||
#define CCE_RUNTIME_EVENT_H | |||
#include "base.h" | |||
@@ -33,8 +33,8 @@ typedef enum rtEventWaitStatus { | |||
* @ingroup event_flags | |||
* @brief event op bit flags | |||
*/ | |||
#define RT_EVENT_DEFAULT (0x0E) | |||
#define RT_EVENT_WITH_FLAG (0x0B) | |||
#define RT_EVENT_DEFAULT (0x0EU) | |||
#define RT_EVENT_WITH_FLAG (0x0BU) | |||
#define RT_EVENT_DDSYNC_NS 0x01U | |||
#define RT_EVENT_STREAM_MARK 0x02U | |||
@@ -200,14 +200,14 @@ RTS_API rtError_t rtNotifyWait(rtNotify_t notify, rtStream_t stream); | |||
/** | |||
* @ingroup dvrt_event | |||
* @brief Wait for a notify with time out | |||
* @param [in] notify_ notify to be wait | |||
* @param [in] stream_ input stream | |||
* @param [in] notify notify to be wait | |||
* @param [in] stream input stream | |||
* @param [in] timeOut input timeOut | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
* @return RT_ERROR_STREAM_CONTEXT for stream is not in current ctx | |||
*/ | |||
RTS_API rtError_t rtNotifyWaitWithTimeOut(rtNotify_t notify_, rtStream_t stream_, uint32_t timeOut); | |||
RTS_API rtError_t rtNotifyWaitWithTimeOut(rtNotify_t notify, rtStream_t stream, uint32_t timeOut); | |||
/** | |||
* @ingroup dvrt_event | |||
@@ -270,10 +270,10 @@ RTS_API rtError_t rtNotifyGetAddrOffset(rtNotify_t notify, uint64_t *devAddrOffs | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
* @return RT_ERROR_DRV_ERR for driver error | |||
*/ | |||
RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num); | |||
RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int32_t num); | |||
#if defined(__cplusplus) | |||
} | |||
#endif | |||
#endif // __CCE_RUNTIME_EVENT_H__ | |||
#endif // CCE_RUNTIME_EVENT_H |
@@ -14,8 +14,8 @@ | |||
* limitations under the License. | |||
*/ | |||
#ifndef __CCE_RUNTIME_KERNEL_H__ | |||
#define __CCE_RUNTIME_KERNEL_H__ | |||
#ifndef CCE_RUNTIME_KERNEL_H | |||
#define CCE_RUNTIME_KERNEL_H | |||
#include "base.h" | |||
#include "stream.h" | |||
@@ -131,7 +131,10 @@ typedef struct tagRtArgsWithTiling { | |||
uint32_t argsSizeWithoutTiling; // input + output + tiling addr size | |||
uint16_t tilingAddrOffset; // tiling addr offset | |||
uint16_t tilingDataOffset; // tiling data offset | |||
uint16_t reserved[2]; | |||
uint16_t hostInputAddrOffset; // index of host_memory input in inputs_addrs list | |||
uint16_t hostInputDataOffset; // host_mem input data offset | |||
bool hasHostMemInput; // has host_memory input data in args or not: ture or false | |||
uint8_t reserved[7]; | |||
} rtArgsWithTiling_t; | |||
/** | |||
@@ -141,7 +144,7 @@ typedef struct tagRtArgsWithTiling { | |||
typedef enum tagRtDumpKind { | |||
RT_DATA_DUMP_KIND_INVALID = -1, | |||
RT_DATA_DUMP_KIND_DUMP = 0, | |||
RT_DATA_DUMP_KIND_RESERVED | |||
RT_DATA_DUMP_KIND_RESERVED = 1, | |||
} rtDumpKind_t; | |||
/** | |||
@@ -160,72 +163,72 @@ typedef void (*rtCallback_t)(void *fnData); | |||
* @ingroup rt_kernel | |||
* @brief magic number of plain binary for aicore | |||
*/ | |||
#define RT_DEV_BINARY_MAGIC_PLAIN 0xabceed50 | |||
#define RT_DEV_BINARY_MAGIC_PLAIN 0xabceed50U | |||
/** | |||
* @ingroup rt_kernel | |||
* @brief magic number of plain binary for aicpu | |||
*/ | |||
#define RT_DEV_BINARY_MAGIC_PLAIN_AICPU 0xabceed51 | |||
#define RT_DEV_BINARY_MAGIC_PLAIN_AICPU 0xabceed51U | |||
/** | |||
* @ingroup rt_kernel | |||
* @brief magic number of plain binary for aivector | |||
*/ | |||
#define RT_DEV_BINARY_MAGIC_PLAIN_AIVEC 0xabceed52 | |||
#define RT_DEV_BINARY_MAGIC_PLAIN_AIVEC 0xabceed52U | |||
/** | |||
* @ingroup rt_kernel | |||
* @brief magic number of elf binary for aicore | |||
*/ | |||
#define RT_DEV_BINARY_MAGIC_ELF 0x43554245 | |||
#define RT_DEV_BINARY_MAGIC_ELF 0x43554245U | |||
/** | |||
* @ingroup rt_kernel | |||
* @brief magic number of elf binary for aicpu | |||
*/ | |||
#define RT_DEV_BINARY_MAGIC_ELF_AICPU 0x41415243 | |||
#define RT_DEV_BINARY_MAGIC_ELF_AICPU 0x41415243U | |||
/** | |||
* @ingroup rt_kernel | |||
* @brief magic number of elf binary for aivector | |||
*/ | |||
#define RT_DEV_BINARY_MAGIC_ELF_AIVEC 0x41415246 | |||
#define RT_DEV_BINARY_MAGIC_ELF_AIVEC 0x41415246U | |||
/** | |||
* @ingroup rt_kernel | |||
* @brief magic number of elf binary for aicube | |||
*/ | |||
#define RT_DEV_BINARY_MAGIC_ELF_AICUBE 0x41494343 | |||
#define RT_DEV_BINARY_MAGIC_ELF_AICUBE 0x41494343U | |||
/** | |||
* @ingroup rt_kernel_flags | |||
* @brief kernel op bit flags | |||
*/ | |||
#define RT_KERNEL_DEFAULT (0x00) | |||
#define RT_KERNEL_CONVERT (0x01) | |||
#define RT_KERNEL_DUMPFLAG (0x02) | |||
#define RT_FUSION_KERNEL_DUMPFLAG (0x04) | |||
#define RT_KERNEL_CUSTOM_AICPU (0x08) | |||
#define RT_KERNEL_DEFAULT (0x00U) | |||
#define RT_KERNEL_CONVERT (0x01U) | |||
#define RT_KERNEL_DUMPFLAG (0x02U) | |||
#define RT_FUSION_KERNEL_DUMPFLAG (0x04U) | |||
#define RT_KERNEL_CUSTOM_AICPU (0x08U) | |||
// STARS topic scheduler sqe : topic_type | |||
#define RT_KERNEL_DEVICE_FIRST (0x10) | |||
#define RT_KERNEL_HOST_ONLY (0x20) | |||
#define RT_KERNEL_HOST_FIRST (0x40) | |||
#define RT_KERNEL_DEVICE_FIRST (0x10U) | |||
#define RT_KERNEL_HOST_ONLY (0x20U) | |||
#define RT_KERNEL_HOST_FIRST (0x40U) | |||
/** | |||
* @ingroup rt_kernel | |||
* @brief kernel mode | |||
**/ | |||
#define RT_DEFAULT_KERNEL_MODE (0x00) | |||
#define RT_NORMAL_KERNEL_MODE (0x01) | |||
#define RT_ALL_KERNEL_MODE (0x02) | |||
#define RT_DEFAULT_KERNEL_MODE (0x00U) | |||
#define RT_NORMAL_KERNEL_MODE (0x01U) | |||
#define RT_ALL_KERNEL_MODE (0x02U) | |||
/** | |||
* @ingroup rt_kernel | |||
* @brief kernel L1 Fusion Dump bit flags | |||
*/ | |||
#define RT_DDR_ADDR (0x0) | |||
#define RT_DDR_ADDR (0x0U) | |||
/** | |||
* @ingroup rt_kernel | |||
@@ -672,7 +675,7 @@ RTS_API rtError_t rtStopMDCProfiler(void *addr); | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockDim, | |||
rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream_); | |||
rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream); | |||
/** | |||
* @ingroup rt_kernel | |||
@@ -688,11 +691,11 @@ RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockD | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtKernelLaunchWithHandleAndTiling(void *handle, const void *devFunc, uint32_t blockDim, | |||
rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream_, const void* kernelInfo); | |||
rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream, const void* kernelInfo); | |||
#if defined(__cplusplus) | |||
} | |||
#endif | |||
#endif // __CCE_RUNTIME_KERNEL_H__ | |||
#endif // CCE_RUNTIME_KERNEL_H | |||
@@ -14,12 +14,10 @@ | |||
* limitations under the License. | |||
*/ | |||
#ifndef __CCE_RUNTIME_MEM_H__ | |||
#define __CCE_RUNTIME_MEM_H__ | |||
#ifndef CCE_RUNTIME_MEM_H | |||
#define CCE_RUNTIME_MEM_H | |||
/*lint -e7*/ | |||
#include <stddef.h> | |||
/*lint +e7*/ | |||
#include "base.h" | |||
#include "config.h" | |||
#include "stream.h" | |||
@@ -32,43 +30,43 @@ extern "C" { | |||
* @ingroup dvrt_mem | |||
* @brief memory type | |||
*/ | |||
#define RT_MEMORY_DEFAULT ((uint32_t)0x0) // default memory on device | |||
#define RT_MEMORY_HBM ((uint32_t)0x2) // HBM memory on device | |||
#define RT_MEMORY_RDMA_HBM ((uint32_t)0x3) // RDMA-HBM memory on device | |||
#define RT_MEMORY_DDR ((uint32_t)0x4) // DDR memory on device | |||
#define RT_MEMORY_SPM ((uint32_t)0x8) // shared physical memory on device | |||
#define RT_MEMORY_P2P_HBM ((uint32_t)0x10) // HBM memory on other 4P device | |||
#define RT_MEMORY_P2P_DDR ((uint32_t)0x11) // DDR memory on other device | |||
#define RT_MEMORY_DDR_NC ((uint32_t)0x20) // DDR memory of non-cache | |||
#define RT_MEMORY_TS_4G ((uint32_t)0x40) | |||
#define RT_MEMORY_TS ((uint32_t)0x80) | |||
#define RT_MEMORY_RESERVED ((uint32_t)0x100) | |||
#define RT_MEMORY_DEFAULT (0x0U) // default memory on device | |||
#define RT_MEMORY_HBM (0x2U) // HBM memory on device | |||
#define RT_MEMORY_RDMA_HBM (0x3U) // RDMA-HBM memory on device | |||
#define RT_MEMORY_DDR (0x4U) // DDR memory on device | |||
#define RT_MEMORY_SPM (0x8U) // shared physical memory on device | |||
#define RT_MEMORY_P2P_HBM (0x10U) // HBM memory on other 4P device | |||
#define RT_MEMORY_P2P_DDR (0x11U) // DDR memory on other device | |||
#define RT_MEMORY_DDR_NC (0x20U) // DDR memory of non-cache | |||
#define RT_MEMORY_TS_4G (0x40U) | |||
#define RT_MEMORY_TS (0x80U) | |||
#define RT_MEMORY_RESERVED (0x100U) | |||
#define RT_MEMORY_L1 ((uint32_t)0x1<<16) | |||
#define RT_MEMORY_L2 ((uint32_t)0x1<<17) | |||
#define RT_MEMORY_L1 (0x1U << 16U) | |||
#define RT_MEMORY_L2 (0x1U << 17U) | |||
/** | |||
* @ingroup dvrt_mem | |||
* @brief memory info type | |||
*/ | |||
#define RT_MEM_INFO_TYPE_DDR_SIZE ((uint32_t)0x1) | |||
#define RT_MEM_INFO_TYPE_HBM_SIZE ((uint32_t)0x2) | |||
#define RT_MEM_INFO_TYPE_DDR_P2P_SIZE ((uint32_t)0x3) | |||
#define RT_MEM_INFO_TYPE_HBM_P2P_SIZE ((uint32_t)0x4) | |||
#define RT_MEM_INFO_TYPE_DDR_SIZE (0x1U) | |||
#define RT_MEM_INFO_TYPE_HBM_SIZE (0x2U) | |||
#define RT_MEM_INFO_TYPE_DDR_P2P_SIZE (0x3U) | |||
#define RT_MEM_INFO_TYPE_HBM_P2P_SIZE (0x4U) | |||
/** | |||
* @ingroup dvrt_mem | |||
* @brief memory Policy | |||
*/ | |||
#define RT_MEMORY_POLICY_NONE ((uint32_t)0x0) // Malloc mem prior hage page, then default page | |||
#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST ((uint32_t)0x1 << 10) // Malloc mem prior hage page, then default page | |||
#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY ((uint32_t)0x1 << 11) // Malloc mem only use hage page | |||
#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY ((uint32_t)0x1 << 12) // Malloc mem only use default page | |||
#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P ((uint32_t)0x1 << 13) // Malloc mem prior hage page, then default page, use for p2p | |||
#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P ((uint32_t)0x1 << 14) // Malloc mem only use hage page, use for p2p | |||
#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P ((uint32_t)0x1 << 15) // Malloc mem only use default page, use for p2p | |||
#define RT_MEMORY_POLICY_NONE (0x0U) // Malloc mem prior huge page, then default page | |||
#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST (0x1U << 10U) // Malloc mem prior huge page, then default page | |||
#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY (0x1U << 11U) // Malloc mem only use huge page | |||
#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY (0x1U << 12U) // Malloc mem only use default page | |||
#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P (0x1U << 13U) // Malloc mem prior huge page, then default page, for p2p | |||
#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P (0x1U << 14U) // Malloc mem only use huge page, use for p2p | |||
#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P (0x1U << 15U) // Malloc mem only use default page, use for p2p | |||
#define MEM_ALLOC_TYPE_BIT ((uint32_t)0x3FF) // mem type bit in <0, 9> | |||
#define MEM_ALLOC_TYPE_BIT (0x3FFU) // mem type bit in <0, 9> | |||
/** | |||
* @ingroup dvrt_mem | |||
@@ -80,10 +78,10 @@ typedef uint32_t rtMemType_t; | |||
* @ingroup dvrt_mem | |||
* @brief memory advise type | |||
*/ | |||
#define RT_MEMORY_ADVISE_EXE (0x02) | |||
#define RT_MEMORY_ADVISE_THP (0x04) | |||
#define RT_MEMORY_ADVISE_PLE (0x08) | |||
#define RT_MEMORY_ADVISE_PIN (0x16) | |||
#define RT_MEMORY_ADVISE_EXE (0x02U) | |||
#define RT_MEMORY_ADVISE_THP (0x04U) | |||
#define RT_MEMORY_ADVISE_PLE (0x08U) | |||
#define RT_MEMORY_ADVISE_PIN (0x16U) | |||
/** | |||
* @ingroup dvrt_mem | |||
@@ -119,7 +117,7 @@ typedef enum tagRtRecudeKind { | |||
RT_MEMCPY_SDMA_AUTOMATIC_MAX = 11, | |||
RT_MEMCPY_SDMA_AUTOMATIC_MIN = 12, | |||
RT_MEMCPY_SDMA_AUTOMATIC_EQUAL = 13, | |||
RT_RECUDE_KIND_END | |||
RT_RECUDE_KIND_END = 14, | |||
} rtRecudeKind_t; | |||
typedef enum tagRtDataType { | |||
@@ -134,7 +132,7 @@ typedef enum tagRtDataType { | |||
RT_DATA_TYPE_UINT8 = 8, // uint8 | |||
RT_DATA_TYPE_UINT16= 9, // uint16 | |||
RT_DATA_TYPE_UINT32= 10,// uint32 | |||
RT_DATA_TYPE_END | |||
RT_DATA_TYPE_END = 11, | |||
} rtDataType_t; | |||
/** | |||
@@ -197,7 +195,7 @@ typedef struct rtMallocHostSharedMemoryIn { | |||
} rtMallocHostSharedMemoryIn; | |||
typedef struct rtMallocHostSharedMemoryOut { | |||
int fd; | |||
int32_t fd; | |||
void *ptr; | |||
void *devPtr; | |||
} rtMallocHostSharedMemoryOut; | |||
@@ -205,7 +203,7 @@ typedef struct rtMallocHostSharedMemoryOut { | |||
typedef struct rtFreeHostSharedMemoryIn { | |||
const char *name; | |||
const uint64_t size; | |||
int fd; | |||
int32_t fd; | |||
void *ptr; | |||
void *devPtr; | |||
} rtFreeHostSharedMemoryIn; | |||
@@ -384,6 +382,39 @@ RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, ui | |||
/** | |||
* @ingroup dvrt_mem | |||
* @brief synchronized memcpy2D | |||
* @param [in] dst destination address pointer | |||
* @param [in] dstPitch pitch of destination memory | |||
* @param [in] src source address pointer | |||
* @param [in] srcPitch pitch of source memory | |||
* @param [in] width width of matrix transfer | |||
* @param [in] height height of matrix transfer | |||
* @param [in] kind memcpy type | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtMemcpy2d(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width, | |||
uint64_t height, rtMemcpyKind_t kind); | |||
/** | |||
* @ingroup dvrt_mem | |||
* @brief asynchronized memcpy2D | |||
* @param [in] dst destination address pointer | |||
* @param [in] dstPitch length of destination address memory | |||
* @param [in] src source address pointer | |||
* @param [in] srcPitch length of destination address memory | |||
* @param [in] width width of matrix transfer | |||
* @param [in] height height of matrix transfer | |||
* @param [in] kind memcpy type | |||
* @param [in] stream asynchronized task stream | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtMemcpy2dAsync(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width, | |||
uint64_t height, rtMemcpyKind_t kind, rtStream_t stream); | |||
/** | |||
* @ingroup dvrt_mem | |||
* @brief query memory size | |||
* @param [in] aiCoreMemorySize | |||
* @return RT_ERROR_NONE for ok, errno for failed | |||
@@ -429,22 +460,22 @@ RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t value, uin | |||
/** | |||
* @ingroup dvrt_mem | |||
* @brief get current device memory total and free | |||
* @param [out] free | |||
* @param [out] total | |||
* @param [out] freeSize | |||
* @param [out] totalSize | |||
* @return RT_ERROR_NONE for ok, errno for failed | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtMemGetInfo(size_t *free, size_t *total); | |||
RTS_API rtError_t rtMemGetInfo(size_t *freeSize, size_t *totalSize); | |||
/** | |||
* @ingroup dvrt_mem | |||
* @brief get current device memory total and free | |||
* @param [in] memInfoType | |||
* @param [out] free | |||
* @param [out] total | |||
* @param [out] freeSize | |||
* @param [out] totalSize | |||
* @return RT_ERROR_NONE for ok, errno for failed | |||
*/ | |||
RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *free, size_t *total); | |||
RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *freeSize, size_t *totalSize); | |||
/** | |||
* @ingroup dvrt_mem | |||
@@ -551,4 +582,4 @@ RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t str | |||
} | |||
#endif | |||
#endif // __CCE_RUNTIME_MEM_H__ | |||
#endif // CCE_RUNTIME_MEM_H |
@@ -14,8 +14,8 @@ | |||
* limitations under the License. | |||
*/ | |||
#ifndef __CCE_RUNTIME_RT_H__ | |||
#define __CCE_RUNTIME_RT_H__ | |||
#ifndef CCE_RUNTIME_RT_H | |||
#define CCE_RUNTIME_RT_H | |||
#include "base.h" | |||
#include "config.h" | |||
@@ -32,4 +32,4 @@ | |||
#include "rt_ffts_plus.h" | |||
#include "rt_dfx.h" | |||
#endif // __CCE_RUNTIME_RT_H__ | |||
#endif // CCE_RUNTIME_RT_H |
@@ -3,8 +3,8 @@ | |||
* Description: ffts interface | |||
*/ | |||
#ifndef __CCE_RUNTIME_FFTS_H | |||
#define __CCE_RUNTIME_FFTS_H | |||
#ifndef CCE_RUNTIME_RT_FFTS_H | |||
#define CCE_RUNTIME_RT_FFTS_H | |||
#include "base.h" | |||
@@ -33,7 +33,7 @@ typedef enum tagFftsSubTaskType { | |||
RT_FFTS_SUB_TASK_TYPE_MIX_AIC = 6, | |||
RT_FFTS_SUB_TASK_TYPE_MIX_AIV = 7, | |||
RT_FFTS_SUB_TASK_TYPE_SDMA = 8, | |||
RT_FFTS_SUB_TASK_TYPE_RESERVED, | |||
RT_FFTS_SUB_TASK_TYPE_RESERVED = 9, | |||
} rtFftsSubTaskType_t; | |||
typedef struct tagManualThreadDmuInfo { | |||
@@ -178,7 +178,9 @@ typedef struct tagFftsTaskInfo { | |||
RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream); | |||
RTS_API rtError_t rtFftsTaskLaunchWithFlag(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream, uint32_t flag); | |||
#if defined(__cplusplus) | |||
} | |||
#endif | |||
#endif // __CCE_RUNTIME_FFTS_H | |||
#endif // CCE_RUNTIME_RT_FFTS_H |
@@ -3,8 +3,8 @@ | |||
* Description: ffts plus interface | |||
*/ | |||
#ifndef __CCE_RUNTIME_FFTS_PLUS_H | |||
#define __CCE_RUNTIME_FFTS_PLUS_H | |||
#ifndef CCE_RUNTIME_RT_FFTS_PLUS_H | |||
#define CCE_RUNTIME_RT_FFTS_PLUS_H | |||
#include "base.h" | |||
#include "rt_ffts_plus_define.h" | |||
@@ -26,9 +26,13 @@ typedef struct tagFftsPlusTaskInfo { | |||
#pragma pack(pop) | |||
RTS_API rtError_t rtGetAddrAndPrefCntWithHandle(void *handle, const void *devFunc, void **addr, uint32_t *prefetchCnt); | |||
RTS_API rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stream); | |||
RTS_API rtError_t rtFftsPlusTaskLaunchWithFlag(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stream, | |||
uint32_t flag); | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
#endif // __CCE_RUNTIME_FFTS_H | |||
#endif // CCE_RUNTIME_RT_FFTS_PLUS_H |
@@ -3,8 +3,8 @@ | |||
* Description: the definition of ffts plus | |||
*/ | |||
#ifndef __CCE_RUNTIME_FFTS_PLUS_DEFINE_H | |||
#define __CCE_RUNTIME_FFTS_PLUS_DEFINE_H | |||
#ifndef CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H | |||
#define CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H | |||
#include "base.h" | |||
@@ -30,7 +30,7 @@ typedef enum tagFftsPlusHwType { | |||
RT_HW_CTX_TYPE_WRITEBACK_DATA = 11, | |||
RT_HW_CTX_TYPE_AICPU = 12, | |||
RT_HW_CTX_TYPE_LOAD = 13, | |||
RT_HW_CTX_TYPE_MAX, | |||
RT_HW_CTX_TYPE_MAX = 14, | |||
} rtFftsPlusHwType_t; | |||
// hardware context type | |||
@@ -40,7 +40,7 @@ typedef enum tagFftsPlusSoftType { | |||
RT_SOFT_CTX_TYPE_AT_START = 3, | |||
RT_SOFT_CTX_TYPE_AT_END = 4, | |||
RT_SOFT_CTX_TYPE_LABEL = 5, | |||
RT_SOFT_CTX_TYPE_MAX, | |||
RT_SOFT_CTX_TYPE_MAX = 6, | |||
} rtFftsPlusSoftType_t; | |||
typedef enum tagFftsPlusContextType { | |||
@@ -71,7 +71,7 @@ typedef enum tagFftsPlusCondType { | |||
RT_COND_TYPE_GREATER_OR_EQUAL = 3, | |||
RT_COND_TYPE_LESS = 4, | |||
RT_COND_TYPE_LESS_OR_EQUAL = 5, | |||
RT_COND_TYPE_MAX, | |||
RT_COND_TYPE_MAX = 6, | |||
} rtFftsPlusCondType_t; | |||
// the definition of ffts plus context | |||
@@ -505,7 +505,7 @@ typedef struct tagFftsPlusAtStartCtx { | |||
uint16_t threadIdInit; | |||
uint16_t threadWindowSize; | |||
// 80-127 | |||
uint16_t res9[12]; | |||
uint32_t res9[12]; | |||
} rtFftsPlusAtStartCtx_t; | |||
// at end context | |||
@@ -712,4 +712,4 @@ typedef struct tagFftsPlusCondSwitchCtx { | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
#endif // __CCE_RUNTIME_FFTS_PLUS_DEFINE_H | |||
#endif // CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H |
@@ -0,0 +1,416 @@ | |||
/* | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||
* Description: mbuf and queue interface | |||
*/ | |||
#ifndef CCE_RUNTIME_RT_MEM_QUEUE_H | |||
#define CCE_RUNTIME_RT_MEM_QUEUE_H | |||
#include "base.h" | |||
#if defined(__cplusplus) | |||
extern "C" { | |||
#endif | |||
#define RT_MQ_MAX_NAME_LEN 128 // same as driver's | |||
#define RT_MQ_DEPTH_MIN 2U | |||
#define RT_MQ_MODE_PUSH 1 | |||
#define RT_MQ_MODE_PULL 2 | |||
#define RT_MQ_MODE_DEFAULT RT_MQ_MODE_PUSH | |||
typedef struct tagMemQueueAttr { | |||
char name[RT_MQ_MAX_NAME_LEN]; | |||
uint32_t depth; | |||
uint32_t workMode; | |||
uint32_t flowCtrlDropTime; | |||
bool flowCtrlFlag; | |||
bool overWriteFlag; | |||
} rtMemQueueAttr_t; | |||
typedef struct tagMemQueueShareAttr { | |||
uint32_t manage : 1; | |||
uint32_t read : 1; | |||
uint32_t write : 1; | |||
uint32_t rsv : 29; | |||
} rtMemQueueShareAttr_t; | |||
typedef struct tagMemQueueBuffInfo { | |||
void *addr; | |||
size_t len; | |||
} rtMemQueueBuffInfo; | |||
typedef struct tagMemQueueBuff { | |||
void *contextAddr; | |||
size_t contextLen; | |||
rtMemQueueBuffInfo *buffInfo; | |||
uint32_t buffCount; | |||
} rtMemQueueBuff_t; | |||
typedef enum tagMemQueueQueryCmd { | |||
RT_MQ_QUERY_QUE_ATTR_OF_CUR_PROC = 0, // input is qid(4bytes), output is rtMemQueueShareAttr_t | |||
RT_MQ_QUERY_QUES_OF_CUR_PROC = 1, | |||
RT_MQ_QUERY_CMD_MAX = 2 | |||
} rtMemQueueQueryCmd_t; | |||
#define RT_MQ_EVENT_QS_MSG 27 // same as driver's | |||
#define RT_MQ_SCHED_PRIORITY_LEVEL0 0 // same as driver's | |||
#define RT_MQ_SCHED_PRIORITY_LEVEL1 1 | |||
#define RT_MQ_SCHED_PRIORITY_LEVEL2 2 | |||
#define RT_MQ_SCHED_PRIORITY_LEVEL3 3 | |||
#define RT_MQ_SCHED_PRIORITY_LEVEL4 4 | |||
#define RT_MQ_SCHED_PRIORITY_LEVEL5 5 | |||
#define RT_MQ_SCHED_PRIORITY_LEVEL6 6 | |||
#define RT_MQ_SCHED_PRIORITY_LEVEL7 7 | |||
/* Events can be released between different systems. This parameter specifies the destination type of events | |||
to be released. The destination type is defined based on the CPU type of the destination system. */ | |||
#define RT_MQ_DST_ENGINE_ACPU_DEVICE 0 // device AICPU, same as driver's | |||
#define RT_MQ_DST_ENGINE_ACPU_HOST 1 // Host AICPU | |||
#define RT_MQ_DST_ENGINE_CCPU_DEVICE 2 // device CtrlCPU | |||
#define RT_MQ_DST_ENGINE_CCPU_HOST 3 // Host CtrlCPU | |||
#define RT_MQ_DST_ENGINE_DCPU_DEVICE 4 // device DataCPU | |||
#define RT_MQ_DST_ENGINE_TS_CPU 5 // device TS CPU | |||
#define RT_MQ_DST_ENGINE_DVPP_CPU 6 // device DVPP CPU | |||
#define RT_MQ_SCHED_EVENT_QS_MSG 25 // same as driver's EVENT_QS_MSG | |||
/* When the destination engine is AICPU, select a policy. | |||
ONLY: The command is executed only on the local AICPU. | |||
FIRST: The local AICPU is preferentially executed. If the local AICPU is busy, the remote AICPU can be used. */ | |||
#define RT_SCHEDULE_POLICY_ONLY 0 // same as driver's schedule_policy | |||
#define RT_SCHEDULE_POLICY_FIRST 1 // same as driver's schedule_policy | |||
typedef struct tagEschedEventSummary { | |||
int32_t pid; // dst PID | |||
uint32_t grpId; | |||
int32_t eventId; // only RT_MQ_SCHED_EVENT_QS_MSG is supported | |||
uint32_t subeventId; | |||
uint32_t msgLen; | |||
char *msg; | |||
uint32_t dstEngine; // dst system cpu type | |||
int32_t policy; // RT_SCHEDULE_POLICY_ONLY or RT_SCHEDULE_POLICY_FIRST | |||
} rtEschedEventSummary_t; | |||
typedef struct tagEschedEventReply { | |||
char *buf; | |||
uint32_t bufLen; | |||
uint32_t replyLen; // output, ack msg len, same with msgLen in halEschedAckEvent | |||
} rtEschedEventReply_t; | |||
#define RT_DEV_PROCESS_CP1 0 | |||
#define RT_DEV_PROCESS_CP2 1 | |||
#define RT_DEV_PROCESS_DEV_ONLY 2 | |||
#define RT_DEV_PROCESS_QS 3 | |||
#define RT_DEV_PROCESS_SIGN_LENGTH 49 | |||
typedef struct tagBindHostpidInfo { | |||
int32_t hostPid; | |||
uint32_t vfid; | |||
uint32_t chipId; | |||
int32_t mode; // online:0, offline:1 | |||
int32_t cpType; // type of custom-process, see RT_DEV_PROCESS_XXX | |||
uint32_t len; // lenth of sign | |||
char sign[RT_DEV_PROCESS_SIGN_LENGTH]; // sign of hostpid | |||
} rtBindHostpidInfo_t; | |||
#define RT_MEM_BUFF_MAX_CFG_NUM 64 | |||
typedef struct { | |||
uint32_t cfgId; // cfg id, start from 0 | |||
uint32_t totalSize; // one zone total size | |||
uint32_t blkSize; // blk size, 2^n (0, 2M] | |||
uint32_t maxBufSize; // max size can alloc from zone | |||
uint32_t pageType; // page type, small page / huge page | |||
int32_t elasticEnable; // elastic enable | |||
int32_t elasticRate; | |||
int32_t elasticRateMax; | |||
int32_t elasticHighLevel; | |||
int32_t elasticLowLevel; | |||
} rtMemZoneCfg_t; | |||
typedef struct { | |||
rtMemZoneCfg_t cfg[RT_MEM_BUFF_MAX_CFG_NUM]; | |||
}rtMemBuffCfg_t; | |||
typedef void *rtMbufPtr_t; | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief init queue schedule | |||
* @param [in] device the logical device id | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMemQueueInitQS(int32_t device); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief create mbuf queue | |||
* @param [in] device the logical device id | |||
* @param [in] rtMemQueueAttr attribute of queue | |||
* @param [out] qid queue id | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMemQueueCreate(int32_t device, const rtMemQueueAttr_t *queueAttr, uint32_t *qid); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief destroy mbuf queue | |||
* @param [in] device the logical device id | |||
* @param [in] qid queue id | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMemQueueDestroy(int32_t device, uint32_t qid); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief destroy mbuf queue init | |||
* @param [in] device the logical device id | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMemQueueInit(int32_t device); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief enqueu mbuf | |||
* @param [in] device the logical device id | |||
* @param [in] qid queue id | |||
* @param [in] mbuf enqueue mbuf | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMemQueueEnQueue(int32_t device, uint32_t qid, void *mbuf); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief enqueu mbuf | |||
* @param [in] device the logical device id | |||
* @param [in] qid queue id | |||
* @param [out] mbuf dequeue mbuf | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMemQueueDeQueue(int32_t device, uint32_t qid, void **mbuf); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief enqueu peek | |||
* @param [in] device the logical device id | |||
* @param [in] qid queue id | |||
* @param [out] bufLen length of mbuf in queue | |||
* @param [in] timeout peek timeout (ms), -1: wait all the time until peeking success | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMemQueuePeek(int32_t device, uint32_t qid, size_t *bufLen, int32_t timeout); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief enqueu buff | |||
* @param [in] device the logical device id | |||
* @param [in] qid queue id | |||
* @param [in] inBuf enqueue buff | |||
* @param [in] timeout enqueue timeout (ms), -1: wait all the time until enqueue success | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMemQueueEnQueueBuff(int32_t device, uint32_t qid, rtMemQueueBuff_t *inBuf, int32_t timeout); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief enqueu buff | |||
* @param [in] device the logical device id | |||
* @param [in] qid queue id | |||
* @param [out] outBuf dequeue buff | |||
* @param [in] timeout dequeue timeout (ms), -1: wait all the time until dequeue success | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMemQueueDeQueueBuff(int32_t device, uint32_t qid, rtMemQueueBuff_t *outBuf, int32_t timeout); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief query queue status | |||
* @param [in] device: the logical device id | |||
* @param [in] cmd: query cmd | |||
* @param [in] inBuff: input buff | |||
* @param [in] inLen: the length of input | |||
* @param [in|out] outBuff: output buff | |||
* @param [in|out] outLen: the length of output | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMemQueueQuery(int32_t device, rtMemQueueQueryCmd_t cmd, const void *inBuff, uint32_t inLen, | |||
void *outBuff, uint32_t *outLen); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief grant queue | |||
* @param [in] device: logic devid | |||
* @param [in] qid: queue id | |||
* @param [in] pid: pid | |||
* @param [in] attr: queue share attr | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMemQueueGrant(int32_t device, uint32_t qid, int32_t pid, rtMemQueueShareAttr_t *attr); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief attach queue | |||
* @param [in] device: logic devid | |||
* @param [in] qid: queue id | |||
* @param [in] timeOut: timeOut | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMemQueueAttach(int32_t device, uint32_t qid, int32_t timeOut); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief Commit the event to a specific process | |||
* @param [in] device: logic devid | |||
* @param [in] event: event summary info | |||
* @param [out] ack: event reply info | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtEschedSubmitEventSync(int32_t device, rtEschedEventSummary_t *event, | |||
rtEschedEventReply_t *ack); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief query device proccess id | |||
* @param [in] info: see struct rtBindHostpidInfo_t | |||
* @param [out] devPid: device proccess id | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtQueryDevPid(rtBindHostpidInfo_t *info, int32_t *devPid); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief device buff init | |||
* @param [in] cfg, init cfg | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMbufInit(rtMemBuffCfg_t *cfg); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief alloc buff | |||
* @param [out] buff: buff addr alloced | |||
* @param [in] size: The amount of memory space requested | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMbufAlloc(rtMbufPtr_t *mbuf, uint64_t size); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief free buff | |||
* @param [in] buff: buff addr to be freed | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMbufFree(rtMbufPtr_t mbuf); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief get Data addr of Mbuf | |||
* @param [in] mbuf: Mbuf addr | |||
* @param [out] buf: Mbuf data addr | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMbufGetBuffAddr(rtMbufPtr_t mbuf, void **buf); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief get total Buffer size of Mbuf | |||
* @param [in] mbuf: Mbuf addr | |||
* @param [out] totalSize: total buffer size of Mbuf | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMbufGetBuffSize(rtMbufPtr_t mbuf, uint64_t *totalSize); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief Get the address and length of its user_data from the specified Mbuf | |||
* @param [in] mbuf: Mbuf addr | |||
* @param [out] priv: address of its user_data | |||
* @param [out] size: length of its user_data | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtMbufGetPrivInfo (rtMbufPtr_t mbuf, void **priv, uint64_t *size); | |||
// mem group | |||
typedef struct { | |||
uint64_t maxMemSize; // max buf size in grp, in KB. = 0 means no limit | |||
} rtMemGrpConfig_t; | |||
typedef struct { | |||
uint32_t admin : 1; // admin permission, can add other proc to grp | |||
uint32_t read : 1; // read only permission | |||
uint32_t write : 1; // read and write permission | |||
uint32_t alloc : 1; // alloc permission (have read and write permission) | |||
uint32_t rsv : 28; | |||
} rtMemGrpShareAttr_t; | |||
#define RT_MEM_GRP_QUERY_GROUPS_OF_PROCESS 1 // query process all grp | |||
typedef struct { | |||
int32_t pid; | |||
} rtMemGrpQueryByProc_t; // cmd: GRP_QUERY_GROUPS_OF_PROCESS | |||
typedef union { | |||
rtMemGrpQueryByProc_t grpQueryByProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS | |||
} rtMemGrpQueryInput_t; | |||
#define RT_MEM_GRP_NAME_LEN 32 // it must be same as driver define BUFF_GRP_NAME_LEN | |||
typedef struct { | |||
char groupName[RT_MEM_GRP_NAME_LEN]; // group name | |||
rtMemGrpShareAttr_t attr; // process in group attribute | |||
} rtMemGrpOfProc_t; // cmd: GRP_QUERY_GROUPS_OF_PROCESS | |||
typedef struct { | |||
rtMemGrpOfProc_t *groupsOfProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS | |||
size_t maxNum; // max number of result | |||
size_t resultNum; // if the number of results exceeds 'maxNum', only 'maxNum' results are filled in buffer | |||
} rtMemGrpQueryOutput_t; | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief create mem group | |||
* @attention null | |||
* @param [in] name, group name | |||
* @param [in] cfg, group cfg | |||
* @return 0 for success, others for fail | |||
*/ | |||
RTS_API rtError_t rtMemGrpCreate(const char *name, const rtMemGrpConfig_t *cfg); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief add process to group | |||
* @param [in] name, group name | |||
* @param [in] pid, process id | |||
* @param [in] attr, process permission in group | |||
* @return 0 for success, others for fail | |||
*/ | |||
RTS_API rtError_t rtMemGrpAddProc(const char *name, int32_t pid, const rtMemGrpShareAttr_t *attr); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief attach proccess to check permission in group | |||
* @param [in] name, group name | |||
* @param [in] timeout, time out ms | |||
* @return 0 for success, others for fail | |||
*/ | |||
RTS_API rtError_t rtMemGrpAttach(const char *name, int32_t timeout); | |||
/** | |||
* @ingroup rt_mem_queue | |||
* @brief buff group query | |||
* @param [in] cmd, cmd type | |||
* @param [in] input, query input | |||
* @param [in|out] output, query output | |||
* @return 0 for success, others for fail | |||
*/ | |||
RTS_API rtError_t rtMemGrpQuery(int32_t cmd, const rtMemGrpQueryInput_t *input, rtMemGrpQueryOutput_t *output); | |||
#if defined(__cplusplus) | |||
} | |||
#endif | |||
#endif // CCE_RUNTIME_RT_MEM_QUEUE_H |
@@ -14,8 +14,8 @@ | |||
* limitations under the License. | |||
*/ | |||
#ifndef __CCE_RUNTIME_MODEL_H__ | |||
#define __CCE_RUNTIME_MODEL_H__ | |||
#ifndef CCE_RUNTIME_RT_MODEL_H | |||
#define CCE_RUNTIME_RT_MODEL_H | |||
#include "base.h" | |||
@@ -42,7 +42,7 @@ typedef enum tagModelTaskType { | |||
RT_MODEL_TASK_NOTIFY_WAIT, | |||
RT_MODEL_TASK_REDUCE_ASYNC, | |||
RT_MODEL_TASK_RDMA_SEND, | |||
RT_MODEL_TASK_EVENT_RESET = 18, | |||
RT_MODEL_TASK_EVENT_RESET, | |||
RT_MODEL_TASK_MODEL_END_GRAPH, | |||
RT_MODEL_TASK_STREAM_SWITCH_N, | |||
RT_MODEL_TASK_RDMA_DB_SEND, | |||
@@ -66,16 +66,16 @@ typedef enum tagModelQueueFlag { | |||
RT_MODEL_OUTPUT_QUEUE = 1 | |||
} rtModelQueueFlag_t; | |||
#define EXECUTOR_NONE ((uint32_t)0x0) | |||
#define EXECUTOR_TS ((uint32_t)0x01) | |||
#define EXECUTOR_AICPU ((uint32_t)0x02) | |||
#define EXECUTOR_NONE (0x0U) | |||
#define EXECUTOR_TS (0x01U) | |||
#define EXECUTOR_AICPU (0x02U) | |||
/* | |||
* @ingroup rt_model | |||
* @brief debug flag for kernel exception dump | |||
*/ | |||
#define RT_DEBUG_FLAG_AICORE_OVERFLOW (0x1 << 0) | |||
#define RT_DEBUG_FLAG_ATOMIC_ADD_OVERFLOW (0x1 << 1) | |||
#define RT_DEBUG_FLAG_AICORE_OVERFLOW (0x1U << 0U) | |||
#define RT_DEBUG_FLAG_ATOMIC_ADD_OVERFLOW (0x1U << 1U) | |||
/** | |||
* @ingroup | |||
@@ -392,12 +392,12 @@ RTS_API rtError_t rtModelExecute(rtModel_t model, rtStream_t stream, uint32_t fl | |||
* @ingroup rt_model | |||
* @brief get model the last persist task id | |||
* @param [in] model model to execute | |||
* @param [out] taskid last task id of the model | |||
* @param [out] streamid last steam id of the model | |||
* @param [out] taskId last task id of the model | |||
* @param [out] streamId last steam id of the model | |||
* @return RT_ERROR_NONE for ok | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtModelGetTaskId(rtModel_t model, uint32_t *taskid, uint32_t *streamid); | |||
RTS_API rtError_t rtModelGetTaskId(rtModel_t model, uint32_t *taskId, uint32_t *streamId); | |||
/** | |||
* @ingroup rt_model | |||
@@ -495,4 +495,4 @@ RTS_API rtError_t rtDebugUnRegister(rtModel_t model); | |||
} | |||
#endif | |||
#endif // __CCE_RUNTIME_MODEL_H__ | |||
#endif // CCE_RUNTIME_RT_MODEL_H |
@@ -1,10 +1,10 @@ | |||
/* | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||
* Description: | |||
* Description: the definition of stars | |||
*/ | |||
#ifndef __CCE_RUNTIME_STARS_H | |||
#define __CCE_RUNTIME_STARS_H | |||
#ifndef CCE_RUNTIME_RT_STARS_H | |||
#define CCE_RUNTIME_RT_STARS_H | |||
#include "base.h" | |||
@@ -84,4 +84,4 @@ RTS_API rtError_t rtCdqEnQueuePtrMode(const char *queName, uint32_t cdqeIndex, c | |||
} | |||
#endif | |||
#endif // __CCE_RUNTIME_STARS_H | |||
#endif // CCE_RUNTIME_RT_STARS_H |
@@ -3,8 +3,8 @@ | |||
* Description: the definition of stars | |||
*/ | |||
#ifndef __CCE_RUNTIME_STARS_DEFINE__H | |||
#define __CCE_RUNTIME_STARS_DEFINE__H | |||
#ifndef CCE_RUNTIME_RT_STARS_DEFINE_H | |||
#define CCE_RUNTIME_RT_STARS_DEFINE_H | |||
#include "base.h" | |||
@@ -88,4 +88,4 @@ typedef struct tagFftsPlusSqe { | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
#endif // __CCE_RUNTIME_STARS_DEFINE__H | |||
#endif // CCE_RUNTIME_RT_STARS_DEFINE_H |
@@ -14,8 +14,8 @@ | |||
* limitations under the License. | |||
*/ | |||
#ifndef __CCE_RUNTIME_STREAM_H__ | |||
#define __CCE_RUNTIME_STREAM_H__ | |||
#ifndef CCE_RUNTIME_STREAM_H | |||
#define CCE_RUNTIME_STREAM_H | |||
#include "base.h" | |||
#include "event.h" | |||
@@ -28,27 +28,27 @@ extern "C" { | |||
* @ingroup stream_flags | |||
* @brief stream op bit flags | |||
*/ | |||
#define RT_STREAM_DEFAULT (0x00) | |||
#define RT_STREAM_PERSISTENT (0x01) | |||
#define RT_STREAM_FORCE_COPY (0x02) | |||
#define RT_STREAM_HUGE (0x04) | |||
#define RT_STREAM_AICPU (0x08) | |||
#define RT_STREAM_FORBIDDEN_DEFAULT (0x10) | |||
#define RT_STREAM_HEAD (0x20) | |||
#define RT_STREAM_PRIMARY_DEFAULT (0x40) | |||
#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80) | |||
#define RT_STREAM_DEFAULT (0x00U) | |||
#define RT_STREAM_PERSISTENT (0x01U) | |||
#define RT_STREAM_FORCE_COPY (0x02U) | |||
#define RT_STREAM_HUGE (0x04U) | |||
#define RT_STREAM_AICPU (0x08U) | |||
#define RT_STREAM_FORBIDDEN_DEFAULT (0x10U) | |||
#define RT_STREAM_HEAD (0x20U) | |||
#define RT_STREAM_PRIMARY_DEFAULT (0x40U) | |||
#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80U) | |||
/** | |||
* @ingroup stream_type | |||
* @brief stream type | |||
*/ | |||
#define RT_NORMAL_STREAM (0x00) | |||
#define RT_HUGE_STREAM (0x01) | |||
#define RT_NORMAL_STREAM (0x00U) | |||
#define RT_HUGE_STREAM (0x01U) | |||
/** | |||
* priority level default value when create a stream | |||
*/ | |||
#define RT_STREAM_PRIORITY_DEFAULT (0) | |||
#define RT_STREAM_PRIORITY_DEFAULT (0U) | |||
/** | |||
* @ingroup dvrt_stream | |||
@@ -215,4 +215,4 @@ RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stream); | |||
} | |||
#endif | |||
#endif // __CCE_RUNTIME_STREAM_H__ | |||
#endif // CCE_RUNTIME_STREAM_H |
@@ -1,59 +1,59 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef _PLOG_H_ | |||
#define _PLOG_H_ | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif // __cplusplus | |||
#ifndef LINUX | |||
#define LINUX 0 | |||
#endif // LINUX | |||
#ifndef WIN | |||
#define WIN 1 | |||
#endif | |||
#ifndef OS_TYPE | |||
#define OS_TYPE 0 | |||
#endif // OS_TYPE | |||
#if (OS_TYPE == LINUX) | |||
#define DLL_EXPORT __attribute__((visibility("default"))) | |||
#else | |||
#define DLL_EXPORT _declspec(dllexport) | |||
#endif | |||
/** | |||
* @ingroup plog | |||
* @brief DlogReportInitialize: init log in service process before all device setting. | |||
* @return: 0: SUCCEED, others: FAILED | |||
*/ | |||
DLL_EXPORT int DlogReportInitialize(); | |||
/** | |||
* @ingroup plog | |||
* @brief DlogReportFinalize: release log resource in service process after all device reset. | |||
* @return: 0: SUCCEED, others: FAILED | |||
*/ | |||
DLL_EXPORT int DlogReportFinalize(); | |||
#ifdef __cplusplus | |||
} | |||
#endif // __cplusplus | |||
#endif // D_PLOG_H_ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef _PLOG_H_ | |||
#define _PLOG_H_ | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif // __cplusplus | |||
#ifndef LINUX | |||
#define LINUX 0 | |||
#endif // LINUX | |||
#ifndef WIN | |||
#define WIN 1 | |||
#endif | |||
#ifndef OS_TYPE | |||
#define OS_TYPE 0 | |||
#endif // OS_TYPE | |||
#if (OS_TYPE == LINUX) | |||
#define DLL_EXPORT __attribute__((visibility("default"))) | |||
#else | |||
#define DLL_EXPORT _declspec(dllexport) | |||
#endif | |||
/** | |||
* @ingroup plog | |||
* @brief DlogReportInitialize: init log in service process before all device setting. | |||
* @return: 0: SUCCEED, others: FAILED | |||
*/ | |||
DLL_EXPORT int DlogReportInitialize(void); | |||
/** | |||
* @ingroup plog | |||
* @brief DlogReportFinalize: release log resource in service process after all device reset. | |||
* @return: 0: SUCCEED, others: FAILED | |||
*/ | |||
DLL_EXPORT int DlogReportFinalize(void); | |||
#ifdef __cplusplus | |||
} | |||
#endif // __cplusplus | |||
#endif // D_PLOG_H_ |
@@ -22,18 +22,7 @@ | |||
#define PROF_TASK_TIME 0x00000002 | |||
#define PROF_AICORE_METRICS 0x00000004 | |||
#define PROF_AICPU_TRACE 0x00000008 | |||
#define PROF_MODEL_EXECUTE 0x00000010 | |||
#define PROF_RUNTIME_API 0x00000020 | |||
#define PROF_RUNTIME_TRACE 0x00000040 | |||
#define PROF_SCHEDULE_TIMELINE 0x00000080 | |||
#define PROF_SCHEDULE_TRACE 0x00000100 | |||
#define PROF_AIVECTORCORE_METRICS 0x00000200 | |||
#define PROF_SUBTASK_TIME 0x00000400 | |||
#define PROF_TRAINING_TRACE 0x00000800 | |||
#define PROF_HCCL_TRACE 0x00001000 | |||
#define PROF_TASK_TRACE 0x00001852 | |||
#define PROF_L2CACHE 0x00000010 | |||
// system profilinig switch | |||
#define PROF_CPU 0x00010000 | |||
@@ -44,6 +33,19 @@ | |||
#define PROF_SYS_AICORE_SAMPLE 0x00200000 | |||
#define PROF_AIVECTORCORE_SAMPLE 0x00400000 | |||
#define PROF_MODEL_EXECUTE 0x0000001000000 | |||
#define PROF_RUNTIME_API 0x0000002000000 | |||
#define PROF_RUNTIME_TRACE 0x0000004000000 | |||
#define PROF_SCHEDULE_TIMELINE 0x0000008000000 | |||
#define PROF_SCHEDULE_TRACE 0x0000010000000 | |||
#define PROF_AIVECTORCORE_METRICS 0x0000020000000 | |||
#define PROF_SUBTASK_TIME 0x0000040000000 | |||
#define PROF_TRAINING_TRACE 0x0000080000000 | |||
#define PROF_HCCL_TRACE 0x0000100000000 | |||
#define PROF_TASK_TRACE 0x0000185000002 | |||
#define PROF_MODEL_LOAD 0x8000000000000000 | |||
// DataTypeConfig MASK | |||
@@ -51,16 +53,7 @@ | |||
#define PROF_TASK_TIME_MASK 0x00000002 | |||
#define PROF_AICORE_METRICS_MASK 0x00000004 | |||
#define PROF_AICPU_TRACE_MASK 0x00000008 | |||
#define PROF_MODEL_EXECUTE_MASK 0x00000010 | |||
#define PROF_RUNTIME_API_MASK 0x00000020 | |||
#define PROF_RUNTIME_TRACE_MASK 0x00000040 | |||
#define PROF_SCHEDULE_TIMELINE_MASK 0x00000080 | |||
#define PROF_SCHEDULE_TRACE_MASK 0x00000100 | |||
#define PROF_AIVECTORCORE_METRICS_MASK 0x00000200 | |||
#define PROF_SUBTASK_TIME_MASK 0x00000400 | |||
#define PROF_TRAINING_TRACE_MASK 0x00000800 | |||
#define PROF_HCCL_TRACE_MASK 0x00001000 | |||
#define PROF_L2CACHE_MASK 0x00000010 | |||
// system profilinig mask | |||
#define PROF_CPU_MASK 0x00010000 | |||
@@ -71,20 +64,27 @@ | |||
#define PROF_SYS_AICORE_SAMPLE_MASK 0x00200000 | |||
#define PROF_AIVECTORCORE_SAMPLE_MASK 0x00400000 | |||
#define PROF_MODEL_LOAD_MASK 0x8000000000000000 | |||
#define PROF_MODEL_EXECUTE_MASK 0x0000001000000 | |||
#define PROF_RUNTIME_API_MASK 0x0000002000000 | |||
#define PROF_RUNTIME_TRACE_MASK 0x0000004000000 | |||
#define PROF_SCHEDULE_TIMELINE_MASK 0x0000008000000 | |||
#define PROF_SCHEDULE_TRACE_MASK 0x0000010000000 | |||
#define PROF_AIVECTORCORE_METRICS_MASK 0x0000020000000 | |||
#define PROF_SUBTASK_TIME_MASK 0x0000040000000 | |||
#ifndef OS_TYPE | |||
#define OS_TYPE 0 | |||
#endif // OS_TYPE | |||
#define PROF_TRAINING_TRACE_MASK 0x0000080000000 | |||
#define PROF_HCCL_TRACE_MASK 0x0000100000000 | |||
#define PROF_MODEL_LOAD_MASK 0x8000000000000000 | |||
#if (OS_TYPE != LINUX) | |||
#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) | |||
#define MSVP_PROF_API __declspec(dllexport) | |||
#else | |||
#define MSVP_PROF_API __attribute__((visibility("default"))) | |||
#endif | |||
#include <cstdint> | |||
#include <stddef.h> | |||
#include <cstddef> | |||
namespace Msprofiler { | |||
namespace Api { | |||
@@ -24,7 +24,7 @@ | |||
extern "C" { | |||
#endif // __cplusplus | |||
#if (OS_TYPE != LINUX) | |||
#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) | |||
#define MSVP_PROF_API __declspec(dllexport) | |||
#else | |||
#define MSVP_PROF_API __attribute__((visibility("default"))) | |||
@@ -16,11 +16,8 @@ | |||
#ifndef MSPROF_ENGINE_PROF_REPORTER_H_ | |||
#define MSPROF_ENGINE_PROF_REPORTER_H_ | |||
#ifndef OS_TYPE | |||
#define OS_TYPE 0 | |||
#endif // OS_TYPE | |||
#if (OS_TYPE != LINUX) | |||
#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) | |||
#define MSVP_PROF_API __declspec(dllexport) | |||
#else | |||
#define MSVP_PROF_API __attribute__((visibility("default"))) | |||