|
|
@@ -0,0 +1,184 @@ |
|
|
|
/* |
|
|
|
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. |
|
|
|
* Description: ffts interface |
|
|
|
*/ |
|
|
|
|
|
|
|
#ifndef __CCE_RUNTIME_FFTS_H |
|
|
|
#define __CCE_RUNTIME_FFTS_H |
|
|
|
|
|
|
|
#include "base.h" |
|
|
|
|
|
|
|
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) |
|
|
|
extern "C" { |
|
|
|
#endif |
|
|
|
|
|
|
|
#define RT_FFTS_MAX_SUB_TASK_NUM 32U |
|
|
|
#define RT_FFTS_MAX_TICKET_CACHE_NUM 64U |
|
|
|
#define RT_FFTS_MAX_MANUAL_THREAD_NUM 16U |
|
|
|
#define RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK 8U |
|
|
|
#define RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN 32U |
|
|
|
|
|
|
|
typedef enum tagFftsType { |
|
|
|
RT_FFTS_TYPE_AUTO_THREAD = 2, // ffts auto thread mode, same as ffts define |
|
|
|
RT_FFTS_TYPE_MANUAL_THREAD = 3, // ffts manual thread mode, same as ffts define |
|
|
|
} rtFftsType_t; |
|
|
|
|
|
|
|
typedef enum tagFftsSubTaskType { |
|
|
|
RT_FFTS_SUB_TASK_TYPE_AIC = 0, |
|
|
|
RT_FFTS_SUB_TASK_TYPE_AIV = 1, |
|
|
|
RT_FFTS_SUB_TASK_TYPE_NOP = 2, |
|
|
|
RT_FFTS_SUB_TASK_TYPE_NOTIFY_WAIT = 3, |
|
|
|
RT_FFTS_SUB_TASK_TYPE_NOTIFY_RECORD = 4, |
|
|
|
RT_FFTS_SUB_TASK_TYPE_WRITE_VALUE = 5, |
|
|
|
RT_FFTS_SUB_TASK_TYPE_MIX_AIC = 6, |
|
|
|
RT_FFTS_SUB_TASK_TYPE_MIX_AIV = 7, |
|
|
|
RT_FFTS_SUB_TASK_TYPE_SDMA = 8, |
|
|
|
RT_FFTS_SUB_TASK_TYPE_RESERVED, |
|
|
|
} rtFftsSubTaskType_t; |
|
|
|
|
|
|
|
typedef struct tagManualThreadDmuInfo { |
|
|
|
uint64_t dataAddr; // device mem |
|
|
|
uint16_t numOuter; |
|
|
|
uint16_t numInner; |
|
|
|
uint32_t strideOuter; |
|
|
|
uint32_t lenInner; |
|
|
|
uint32_t strideInner; |
|
|
|
} rtManualThreadDmuInfo_t; |
|
|
|
|
|
|
|
typedef struct tagManualThreadDependency { |
|
|
|
uint8_t dependency[RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN]; |
|
|
|
} rtManualThreadDependency_t; |
|
|
|
|
|
|
|
typedef struct tagManualThreadAicAivInfo { |
|
|
|
uint64_t taskParamAddr; // device mem |
|
|
|
uint16_t taskParamOffset; |
|
|
|
// when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16 |
|
|
|
// when satMode=0 and FP16 computation with none INF inputs overflows/underflows, |
|
|
|
// results will be saturated to +/-MAX of FP16 |
|
|
|
uint8_t satMode; |
|
|
|
uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved |
|
|
|
uint8_t iCachePrefetchCnt; // units is 2K |
|
|
|
uint8_t prefetchEnableBitmap; // 8 bit bitmap 1 0 1 0 |
|
|
|
uint8_t prefetchOnceBitmap; // 8 bit bitmap 1 0 1 0 |
|
|
|
uint16_t prefetchOnceDmuNum; // prefetch_once_dmu_descriptor_index in ffts |
|
|
|
// num: thread0_prefetch_dmu_descriptor_index – prefetch_once_dmu_descriptor_index |
|
|
|
uint16_t threadPrefetchDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; // max valid is threadDim |
|
|
|
uint16_t threadBlkDim[RT_FFTS_MAX_MANUAL_THREAD_NUM]; |
|
|
|
const char *threadTaskFuncStub[RT_FFTS_MAX_MANUAL_THREAD_NUM]; |
|
|
|
|
|
|
|
rtManualThreadDmuInfo_t *prefetchList; // dmu desc 0-64k, length is the last threadPrefetchDmuIdx[threadDim-1] |
|
|
|
rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; |
|
|
|
} rtManualThreadAicAivInfo_t; |
|
|
|
|
|
|
|
typedef struct tagAutoThreadPrefetch { |
|
|
|
uint64_t dataAddr; // device mem |
|
|
|
uint32_t dataAddrOffset; |
|
|
|
uint32_t nonTailDataLen; |
|
|
|
uint32_t tailDataLen; |
|
|
|
} rtAutoThreadPrefetch_t; |
|
|
|
|
|
|
|
typedef struct tagAutoThreadAicAivInfo { |
|
|
|
uint64_t taskParamAddr; // device mem |
|
|
|
uint16_t taskParamOffset; |
|
|
|
// when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16 |
|
|
|
// when satMode=0 and FP16 computation with none INF inputs overflows/underflows, results will be saturated to +/-MAX of FP16 |
|
|
|
uint8_t satMode; |
|
|
|
uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved |
|
|
|
uint8_t iCachePrefetchCnt; // units is 2K |
|
|
|
uint8_t prefetchEnableBitmap; // 8 bit bitmap |
|
|
|
uint8_t prefetchOnceBitmap; // 8 bit bitmap |
|
|
|
|
|
|
|
uint16_t tailBlkDim; |
|
|
|
uint16_t nonTailBlkDim; |
|
|
|
|
|
|
|
const char *nonTailTaskFuncStub; |
|
|
|
const char *tailTaskFuncStub; |
|
|
|
|
|
|
|
// for prefetch, valid num is prefetchEnableBitmap bit count. |
|
|
|
// if prefetchEnableBitmap='00010011', need prefetch number is 3, srcPrefetch is only 0, 1, 2 is valid |
|
|
|
rtAutoThreadPrefetch_t srcPrefetch[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; |
|
|
|
} rtAutoThreadAicAivInfo_t; |
|
|
|
|
|
|
|
typedef struct tagAutoThreadCacheInfo { |
|
|
|
uint64_t dataAddr; // device mem |
|
|
|
uint32_t dataAddrOffset; |
|
|
|
uint32_t nonTailDataLen; |
|
|
|
uint32_t tailDataLen; |
|
|
|
uint16_t ticketCacheRefCnt; |
|
|
|
} rtAutoThreadCacheInfo_t; |
|
|
|
|
|
|
|
typedef struct tagManualThreadCacheInfo { |
|
|
|
rtManualThreadDmuInfo_t *dmuList; // 0-64k |
|
|
|
uint16_t dmuNum; |
|
|
|
uint16_t sliceDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; |
|
|
|
uint16_t ticketCacheRefCntTbl[RT_FFTS_MAX_MANUAL_THREAD_NUM]; |
|
|
|
} rtManualThreadCacheInfo_t; |
|
|
|
|
|
|
|
typedef enum tagCacheOp { |
|
|
|
RT_CACHE_OP_NONE = 0, |
|
|
|
RT_CACHE_OP_FLUSH = 1, |
|
|
|
RT_CACHE_OP_INVALIDATE = 2, |
|
|
|
RT_CACHE_OP_WRITE_BACK = 3, |
|
|
|
} rtCacheOp_t; |
|
|
|
|
|
|
|
typedef struct tagTicketCache { |
|
|
|
rtCacheOp_t cacheOption; |
|
|
|
uint8_t ticketCacheWindow; |
|
|
|
union { |
|
|
|
rtAutoThreadCacheInfo_t autoThreadCache; |
|
|
|
rtManualThreadCacheInfo_t manualThreadCache; |
|
|
|
} custom; |
|
|
|
} rtTicketCache_t; |
|
|
|
|
|
|
|
typedef struct tagManualThreadNopInfo { |
|
|
|
// depend srcTickCacheVldBitmap in rtFftsSubTaskInfo_t |
|
|
|
rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; |
|
|
|
} rtManualThreadNopInfo_t; |
|
|
|
|
|
|
|
typedef struct tagFftsSubTaskInfo { |
|
|
|
rtFftsSubTaskType_t subTaskType; |
|
|
|
uint16_t threadDim; |
|
|
|
uint8_t dstTickCacheVldBitmap; |
|
|
|
uint8_t srcTickCacheVldBitmap; |
|
|
|
uint8_t srcDataOutOfSubGraphBitmap; |
|
|
|
uint8_t dstTickCacheID[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; |
|
|
|
uint8_t srcTickCacheID[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; |
|
|
|
union { |
|
|
|
rtAutoThreadAicAivInfo_t autoThreadAicAiv; |
|
|
|
rtManualThreadAicAivInfo_t manualThreadAicAiv; |
|
|
|
rtManualThreadNopInfo_t manualThreadNop; |
|
|
|
} custom; |
|
|
|
} rtFftsSubTaskInfo_t; |
|
|
|
|
|
|
|
typedef struct tagFftsDescInfo { |
|
|
|
uint8_t tm; // thread subtask kickstart mode, 0:order, 1:disorder |
|
|
|
uint8_t di; // discard invalidate |
|
|
|
uint8_t dw; // discard write back |
|
|
|
uint8_t df; // discard flush |
|
|
|
uint8_t dataSplitUnit; // split source or ticket cache by 2^dataSplitUnit MB |
|
|
|
uint8_t prefetchOstNum; |
|
|
|
uint8_t cacheMaintainOstNum; |
|
|
|
uint8_t aicPrefetchUpper; |
|
|
|
uint8_t aicPrefetchLower; |
|
|
|
uint8_t aivPrefetchUpper; |
|
|
|
uint8_t aivPrefetchLower; |
|
|
|
} rtFftsDescInfo_t; |
|
|
|
|
|
|
|
typedef struct tagFftsTaskInfo { |
|
|
|
rtFftsType_t fftsType; |
|
|
|
uint16_t subTaskNum; |
|
|
|
uint16_t tickCacheNum; |
|
|
|
rtFftsDescInfo_t fftsDesc; |
|
|
|
// sub task desc, real num is subTaskNum |
|
|
|
rtFftsSubTaskInfo_t subTask[RT_FFTS_MAX_SUB_TASK_NUM]; |
|
|
|
|
|
|
|
// ticket cache, real number is tickCacheNum. |
|
|
|
rtTicketCache_t ticketCache[RT_FFTS_MAX_TICKET_CACHE_NUM]; |
|
|
|
} rtFftsTaskInfo_t; |
|
|
|
|
|
|
|
RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream); |
|
|
|
|
|
|
|
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) |
|
|
|
} |
|
|
|
#endif |
|
|
|
#endif // __CCE_RUNTIME_FFTS_H |