Browse Source

code_sync_0604

tags/v1.3.0
dingpeifei 4 years ago
parent
commit
bc93b5b742
1 changed files with 184 additions and 0 deletions
  1. +184
    -0
      third_party/fwkacllib/inc/runtime/rt_ffts.h

+ 184
- 0
third_party/fwkacllib/inc/runtime/rt_ffts.h View File

@@ -0,0 +1,184 @@
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
* Description: ffts interface
*/

#ifndef __CCE_RUNTIME_FFTS_H
#define __CCE_RUNTIME_FFTS_H

#include "base.h"

#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
extern "C" {
#endif

#define RT_FFTS_MAX_SUB_TASK_NUM 32U
#define RT_FFTS_MAX_TICKET_CACHE_NUM 64U
#define RT_FFTS_MAX_MANUAL_THREAD_NUM 16U
#define RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK 8U
#define RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN 32U

typedef enum tagFftsType {
RT_FFTS_TYPE_AUTO_THREAD = 2, // ffts auto thread mode, same as ffts define
RT_FFTS_TYPE_MANUAL_THREAD = 3, // ffts manual thread mode, same as ffts define
} rtFftsType_t;

typedef enum tagFftsSubTaskType {
RT_FFTS_SUB_TASK_TYPE_AIC = 0,
RT_FFTS_SUB_TASK_TYPE_AIV = 1,
RT_FFTS_SUB_TASK_TYPE_NOP = 2,
RT_FFTS_SUB_TASK_TYPE_NOTIFY_WAIT = 3,
RT_FFTS_SUB_TASK_TYPE_NOTIFY_RECORD = 4,
RT_FFTS_SUB_TASK_TYPE_WRITE_VALUE = 5,
RT_FFTS_SUB_TASK_TYPE_MIX_AIC = 6,
RT_FFTS_SUB_TASK_TYPE_MIX_AIV = 7,
RT_FFTS_SUB_TASK_TYPE_SDMA = 8,
RT_FFTS_SUB_TASK_TYPE_RESERVED,
} rtFftsSubTaskType_t;

typedef struct tagManualThreadDmuInfo {
uint64_t dataAddr; // device mem
uint16_t numOuter;
uint16_t numInner;
uint32_t strideOuter;
uint32_t lenInner;
uint32_t strideInner;
} rtManualThreadDmuInfo_t;

typedef struct tagManualThreadDependency {
uint8_t dependency[RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN];
} rtManualThreadDependency_t;

typedef struct tagManualThreadAicAivInfo {
uint64_t taskParamAddr; // device mem
uint16_t taskParamOffset;
// when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16
// when satMode=0 and FP16 computation with none INF inputs overflows/underflows,
// results will be saturated to +/-MAX of FP16
uint8_t satMode;
uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved
uint8_t iCachePrefetchCnt; // units is 2K
uint8_t prefetchEnableBitmap; // 8 bit bitmap 1 0 1 0
uint8_t prefetchOnceBitmap; // 8 bit bitmap 1 0 1 0
uint16_t prefetchOnceDmuNum; // prefetch_once_dmu_descriptor_index in ffts
// num: thread0_prefetch_dmu_descriptor_index – prefetch_once_dmu_descriptor_index
uint16_t threadPrefetchDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; // max valid is threadDim
uint16_t threadBlkDim[RT_FFTS_MAX_MANUAL_THREAD_NUM];
const char *threadTaskFuncStub[RT_FFTS_MAX_MANUAL_THREAD_NUM];

rtManualThreadDmuInfo_t *prefetchList; // dmu desc 0-64k, length is the last threadPrefetchDmuIdx[threadDim-1]
rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK];
} rtManualThreadAicAivInfo_t;

typedef struct tagAutoThreadPrefetch {
uint64_t dataAddr; // device mem
uint32_t dataAddrOffset;
uint32_t nonTailDataLen;
uint32_t tailDataLen;
} rtAutoThreadPrefetch_t;

typedef struct tagAutoThreadAicAivInfo {
uint64_t taskParamAddr; // device mem
uint16_t taskParamOffset;
// when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16
// when satMode=0 and FP16 computation with none INF inputs overflows/underflows, results will be saturated to +/-MAX of FP16
uint8_t satMode;
uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode 3:reserved
uint8_t iCachePrefetchCnt; // units is 2K
uint8_t prefetchEnableBitmap; // 8 bit bitmap
uint8_t prefetchOnceBitmap; // 8 bit bitmap

uint16_t tailBlkDim;
uint16_t nonTailBlkDim;

const char *nonTailTaskFuncStub;
const char *tailTaskFuncStub;

// for prefetch, valid num is prefetchEnableBitmap bit count.
// if prefetchEnableBitmap='00010011', need prefetch number is 3, srcPrefetch is only 0, 1, 2 is valid
rtAutoThreadPrefetch_t srcPrefetch[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK];
} rtAutoThreadAicAivInfo_t;

typedef struct tagAutoThreadCacheInfo {
uint64_t dataAddr; // device mem
uint32_t dataAddrOffset;
uint32_t nonTailDataLen;
uint32_t tailDataLen;
uint16_t ticketCacheRefCnt;
} rtAutoThreadCacheInfo_t;

typedef struct tagManualThreadCacheInfo {
rtManualThreadDmuInfo_t *dmuList; // 0-64k
uint16_t dmuNum;
uint16_t sliceDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM];
uint16_t ticketCacheRefCntTbl[RT_FFTS_MAX_MANUAL_THREAD_NUM];
} rtManualThreadCacheInfo_t;

typedef enum tagCacheOp {
RT_CACHE_OP_NONE = 0,
RT_CACHE_OP_FLUSH = 1,
RT_CACHE_OP_INVALIDATE = 2,
RT_CACHE_OP_WRITE_BACK = 3,
} rtCacheOp_t;

typedef struct tagTicketCache {
rtCacheOp_t cacheOption;
uint8_t ticketCacheWindow;
union {
rtAutoThreadCacheInfo_t autoThreadCache;
rtManualThreadCacheInfo_t manualThreadCache;
} custom;
} rtTicketCache_t;

typedef struct tagManualThreadNopInfo {
// depend srcTickCacheVldBitmap in rtFftsSubTaskInfo_t
rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK];
} rtManualThreadNopInfo_t;

typedef struct tagFftsSubTaskInfo {
rtFftsSubTaskType_t subTaskType;
uint16_t threadDim;
uint8_t dstTickCacheVldBitmap;
uint8_t srcTickCacheVldBitmap;
uint8_t srcDataOutOfSubGraphBitmap;
uint8_t dstTickCacheID[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK];
uint8_t srcTickCacheID[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK];
union {
rtAutoThreadAicAivInfo_t autoThreadAicAiv;
rtManualThreadAicAivInfo_t manualThreadAicAiv;
rtManualThreadNopInfo_t manualThreadNop;
} custom;
} rtFftsSubTaskInfo_t;

typedef struct tagFftsDescInfo {
uint8_t tm; // thread subtask kickstart mode, 0:order, 1:disorder
uint8_t di; // discard invalidate
uint8_t dw; // discard write back
uint8_t df; // discard flush
uint8_t dataSplitUnit; // split source or ticket cache by 2^dataSplitUnit MB
uint8_t prefetchOstNum;
uint8_t cacheMaintainOstNum;
uint8_t aicPrefetchUpper;
uint8_t aicPrefetchLower;
uint8_t aivPrefetchUpper;
uint8_t aivPrefetchLower;
} rtFftsDescInfo_t;

typedef struct tagFftsTaskInfo {
rtFftsType_t fftsType;
uint16_t subTaskNum;
uint16_t tickCacheNum;
rtFftsDescInfo_t fftsDesc;
// sub task desc, real num is subTaskNum
rtFftsSubTaskInfo_t subTask[RT_FFTS_MAX_SUB_TASK_NUM];

// ticket cache, real number is tickCacheNum.
rtTicketCache_t ticketCache[RT_FFTS_MAX_TICKET_CACHE_NUM];
} rtFftsTaskInfo_t;

RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream);

#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
}
#endif
#endif // __CCE_RUNTIME_FFTS_H

Loading…
Cancel
Save