diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts.h b/third_party/fwkacllib/inc/runtime/rt_ffts.h new file mode 100644 index 00000000..bae5a54d --- /dev/null +++ b/third_party/fwkacllib/inc/runtime/rt_ffts.h @@ -0,0 +1,184 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. + * Description: ffts interface + */ + +#ifndef __CCE_RUNTIME_FFTS_H +#define __CCE_RUNTIME_FFTS_H + +#include "base.h" + +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +extern "C" { +#endif + +#define RT_FFTS_MAX_SUB_TASK_NUM 32U +#define RT_FFTS_MAX_TICKET_CACHE_NUM 64U +#define RT_FFTS_MAX_MANUAL_THREAD_NUM 16U +#define RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK 8U +#define RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN 32U + +typedef enum tagFftsType { + RT_FFTS_TYPE_AUTO_THREAD = 2, // ffts auto thread mode, same as ffts define + RT_FFTS_TYPE_MANUAL_THREAD = 3, // ffts manual thread mode, same as ffts define +} rtFftsType_t; + +typedef enum tagFftsSubTaskType { + RT_FFTS_SUB_TASK_TYPE_AIC = 0, + RT_FFTS_SUB_TASK_TYPE_AIV = 1, + RT_FFTS_SUB_TASK_TYPE_NOP = 2, + RT_FFTS_SUB_TASK_TYPE_NOTIFY_WAIT = 3, + RT_FFTS_SUB_TASK_TYPE_NOTIFY_RECORD = 4, + RT_FFTS_SUB_TASK_TYPE_WRITE_VALUE = 5, + RT_FFTS_SUB_TASK_TYPE_MIX_AIC = 6, + RT_FFTS_SUB_TASK_TYPE_MIX_AIV = 7, + RT_FFTS_SUB_TASK_TYPE_SDMA = 8, + RT_FFTS_SUB_TASK_TYPE_RESERVED, +} rtFftsSubTaskType_t; + +typedef struct tagManualThreadDmuInfo { + uint64_t dataAddr; // device mem + uint16_t numOuter; + uint16_t numInner; + uint32_t strideOuter; + uint32_t lenInner; + uint32_t strideInner; +} rtManualThreadDmuInfo_t; + +typedef struct tagManualThreadDependency { + uint8_t dependency[RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN]; +} rtManualThreadDependency_t; + +typedef struct tagManualThreadAicAivInfo { + uint64_t taskParamAddr; // device mem + uint16_t taskParamOffset; + // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16 + // when satMode=0 and FP16 computation with none INF inputs overflows/underflows, + // results will be saturated to +/-MAX of FP16 + uint8_t satMode; + uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved + uint8_t iCachePrefetchCnt; // units is 2K + uint8_t prefetchEnableBitmap; // 8 bit bitmap 1 0 1 0 + uint8_t prefetchOnceBitmap; // 8 bit bitmap 1 0 1 0 + uint16_t prefetchOnceDmuNum; // prefetch_once_dmu_descriptor_index in ffts + // num: thread0_prefetch_dmu_descriptor_index – prefetch_once_dmu_descriptor_index + uint16_t threadPrefetchDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; // max valid is threadDim + uint16_t threadBlkDim[RT_FFTS_MAX_MANUAL_THREAD_NUM]; + const char *threadTaskFuncStub[RT_FFTS_MAX_MANUAL_THREAD_NUM]; + + rtManualThreadDmuInfo_t *prefetchList; // dmu desc 0-64k, length is the last threadPrefetchDmuIdx[threadDim-1] + rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; +} rtManualThreadAicAivInfo_t; + +typedef struct tagAutoThreadPrefetch { + uint64_t dataAddr; // device mem + uint32_t dataAddrOffset; + uint32_t nonTailDataLen; + uint32_t tailDataLen; +} rtAutoThreadPrefetch_t; + +typedef struct tagAutoThreadAicAivInfo { + uint64_t taskParamAddr; // device mem + uint16_t taskParamOffset; + // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16 + // when satMode=0 and FP16 computation with none INF inputs overflows/underflows, results will be saturated to +/-MAX of FP16 + uint8_t satMode; + uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved + uint8_t iCachePrefetchCnt; // units is 2K + uint8_t prefetchEnableBitmap; // 8 bit bitmap + uint8_t prefetchOnceBitmap; // 8 bit bitmap + + uint16_t tailBlkDim; + uint16_t nonTailBlkDim; + + const char *nonTailTaskFuncStub; + const char *tailTaskFuncStub; + + // for prefetch, valid num is prefetchEnableBitmap bit count. + // if prefetchEnableBitmap='00010011', need prefetch number is 3, srcPrefetch is only 0, 1, 2 is valid + rtAutoThreadPrefetch_t srcPrefetch[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; +} rtAutoThreadAicAivInfo_t; + +typedef struct tagAutoThreadCacheInfo { + uint64_t dataAddr; // device mem + uint32_t dataAddrOffset; + uint32_t nonTailDataLen; + uint32_t tailDataLen; + uint16_t ticketCacheRefCnt; +} rtAutoThreadCacheInfo_t; + +typedef struct tagManualThreadCacheInfo { + rtManualThreadDmuInfo_t *dmuList; // 0-64k + uint16_t dmuNum; + uint16_t sliceDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; + uint16_t ticketCacheRefCntTbl[RT_FFTS_MAX_MANUAL_THREAD_NUM]; +} rtManualThreadCacheInfo_t; + +typedef enum tagCacheOp { + RT_CACHE_OP_NONE = 0, + RT_CACHE_OP_FLUSH = 1, + RT_CACHE_OP_INVALIDATE = 2, + RT_CACHE_OP_WRITE_BACK = 3, +} rtCacheOp_t; + +typedef struct tagTicketCache { + rtCacheOp_t cacheOption; + uint8_t ticketCacheWindow; + union { + rtAutoThreadCacheInfo_t autoThreadCache; + rtManualThreadCacheInfo_t manualThreadCache; + } custom; +} rtTicketCache_t; + +typedef struct tagManualThreadNopInfo { + // depend srcTickCacheVldBitmap in rtFftsSubTaskInfo_t + rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; +} rtManualThreadNopInfo_t; + +typedef struct tagFftsSubTaskInfo { + rtFftsSubTaskType_t subTaskType; + uint16_t threadDim; + uint8_t dstTickCacheVldBitmap; + uint8_t srcTickCacheVldBitmap; + uint8_t srcDataOutOfSubGraphBitmap; + uint8_t dstTickCacheID[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; + uint8_t srcTickCacheID[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; + union { + rtAutoThreadAicAivInfo_t autoThreadAicAiv; + rtManualThreadAicAivInfo_t manualThreadAicAiv; + rtManualThreadNopInfo_t manualThreadNop; + } custom; +} rtFftsSubTaskInfo_t; + +typedef struct tagFftsDescInfo { + uint8_t tm; // thread subtask kickstart mode, 0:order, 1:disorder + uint8_t di; // discard invalidate + uint8_t dw; // discard write back + uint8_t df; // discard flush + uint8_t dataSplitUnit; // split source or ticket cache by 2^dataSplitUnit MB + uint8_t prefetchOstNum; + uint8_t cacheMaintainOstNum; + uint8_t aicPrefetchUpper; + uint8_t aicPrefetchLower; + uint8_t aivPrefetchUpper; + uint8_t aivPrefetchLower; +} rtFftsDescInfo_t; + +typedef struct tagFftsTaskInfo { + rtFftsType_t fftsType; + uint16_t subTaskNum; + uint16_t tickCacheNum; + rtFftsDescInfo_t fftsDesc; + // sub task desc, real num is subTaskNum + rtFftsSubTaskInfo_t subTask[RT_FFTS_MAX_SUB_TASK_NUM]; + + // ticket cache, real number is tickCacheNum. + rtTicketCache_t ticketCache[RT_FFTS_MAX_TICKET_CACHE_NUM]; +} rtFftsTaskInfo_t; + +RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream); + +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +} +#endif +#endif // __CCE_RUNTIME_FFTS_H