You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

rt_ffts_plus_define.h 18 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747
  1. /*
  2. * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  3. * Description: the definition of ffts plus
  4. */
  5. #ifndef CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H
  6. #define CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H
  7. #include "base.h"
  8. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  9. extern "C" {
  10. #endif
  11. #pragma pack(push)
  12. #pragma pack (1)
  13. // hardware context type
  14. typedef enum tagFftsPlusHwType {
  15. RT_HW_CTX_TYPE_AIC = 0,
  16. RT_HW_CTX_TYPE_AIV = 1,
  17. RT_HW_CTX_TYPE_NOTIFY_WAIT = 3,
  18. RT_HW_CTX_TYPE_NOTIFY_RECORD = 4,
  19. RT_HW_CTX_TYPE_WRITE_VALUE = 5,
  20. RT_HW_CTX_TYPE_MIX_AIC = 6,
  21. RT_HW_CTX_TYPE_MIX_AIV = 7,
  22. RT_HW_CTX_TYPE_SDMA = 8,
  23. RT_HW_CTX_TYPE_FLUSH_DATA = 9,
  24. RT_HW_CTX_TYPE_INVALIDATE_DATA = 10,
  25. RT_HW_CTX_TYPE_WRITEBACK_DATA = 11,
  26. RT_HW_CTX_TYPE_AICPU = 12,
  27. RT_HW_CTX_TYPE_LOAD = 13,
  28. RT_HW_CTX_TYPE_MAX = 14,
  29. } rtFftsPlusHwType_t;
  30. // hardware context type
  31. typedef enum tagFftsPlusSoftType {
  32. RT_SOFT_CTX_TYPE_COND_SWITCH = 1,
  33. RT_SOFT_CTX_TYPE_CASE_SWITCH = 2,
  34. RT_SOFT_CTX_TYPE_AT_START = 3,
  35. RT_SOFT_CTX_TYPE_AT_END = 4,
  36. RT_SOFT_CTX_TYPE_LABEL = 5,
  37. RT_SOFT_CTX_PERSISTENT_CACHE = 6,
  38. RT_SOFT_CTX_TYPE_MAX = 7,
  39. } rtFftsPlusSoftType_t;
  40. typedef enum tagFftsPlusContextType {
  41. RT_CTX_TYPE_AICORE = 0x0000,
  42. RT_CTX_TYPE_AIV = 0x0001,
  43. RT_CTX_TYPE_NOTIFY_WAIT = 0x0003,
  44. RT_CTX_TYPE_NOTIFY_RECORD = 0x0004,
  45. RT_CTX_TYPE_WRITE_VALUE = 0x0005,
  46. RT_CTX_TYPE_MIX_AIC = 0x0006,
  47. RT_CTX_TYPE_MIX_AIV = 0x0007,
  48. RT_CTX_TYPE_SDMA = 0x0008,
  49. RT_CTX_TYPE_FLUSH_DATA = 0x0009,
  50. RT_CTX_TYPE_INVALIDATE_DATA = 0x000A,
  51. RT_CTX_TYPE_WRITEBACK_DATA = 0x000B,
  52. RT_CTX_TYPE_AICPU = 0x000C,
  53. RT_CTX_TYPE_COND_SWITCH = 0x010D,
  54. RT_CTX_TYPE_CASE_SWITCH = 0x020D,
  55. RT_CTX_TYPE_AT_START = 0x0300,
  56. RT_CTX_TYPE_AT_END = 0x0400,
  57. RT_CTX_TYPE_LABEL = 0x0500,
  58. RT_CTX_TYPE_PERSISTENT_CACHE = 0x0600,
  59. }rtFftsPlusContextType_t;
  60. // condition type
  61. typedef enum tagFftsPlusCondType {
  62. RT_COND_TYPE_EQUAL = 0,
  63. RT_COND_TYPE_NOTEQUAL = 1,
  64. RT_COND_TYPE_GREATER = 2,
  65. RT_COND_TYPE_GREATER_OR_EQUAL = 3,
  66. RT_COND_TYPE_LESS = 4,
  67. RT_COND_TYPE_LESS_OR_EQUAL = 5,
  68. RT_COND_TYPE_MAX = 6,
  69. } rtFftsPlusCondType_t;
  70. // the definition of ffts plus context
  71. #define RT_CTX_SUCCESSOR_NUM 26
  72. // ffts plus common context
  73. typedef struct tagFftsPlusComCtx {
  74. // 0-3 bytes
  75. uint16_t contextType;
  76. uint8_t successorNum;
  77. uint8_t rsv1 : 7;
  78. uint8_t aten : 1;
  79. // 4-7
  80. uint8_t rsv2;
  81. uint8_t rsv3;
  82. uint8_t predCntInit;
  83. uint8_t predCnt;
  84. // 8-11
  85. uint32_t rsv4;
  86. // 12-63
  87. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  88. // 64-71
  89. uint32_t rsv5[2];
  90. // 72-75
  91. uint16_t threadId;
  92. uint16_t threadDim;
  93. // 76-127
  94. uint32_t res6[13];
  95. } rtFftsPlusComCtx_t;
  96. // aic/aiv context
  97. typedef struct tagFftsPlusAicAivCtx {
  98. // 0-3 bytes
  99. uint16_t contextType;
  100. uint8_t successorNum;
  101. uint8_t resv : 6;
  102. uint8_t dumpSwitch : 1;
  103. uint8_t aten : 1;
  104. // 4-7
  105. uint8_t prefetchConfig;
  106. uint8_t resv1;
  107. uint8_t predCntInit;
  108. uint8_t predCnt;
  109. // 8-11
  110. uint32_t resv2;
  111. // 12-63
  112. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  113. // 64-67
  114. uint16_t resv3 : 1;
  115. uint16_t schem : 2;
  116. uint16_t icachePrefetchCnt : 5;
  117. uint16_t resv4 : 7;
  118. uint16_t atm : 1;
  119. uint16_t prefetchEnableBitmap : 4;
  120. uint16_t res6 : 4;
  121. uint16_t prefetchOnceBitmap : 4;
  122. uint16_t res7 : 4;
  123. // 68-71
  124. uint16_t pmg : 2;
  125. uint16_t ns : 1;
  126. uint16_t partId : 8;
  127. uint16_t res8 : 1;
  128. uint16_t qos : 4;
  129. uint16_t res9;
  130. // 72-75
  131. uint16_t threadId;
  132. uint16_t threadDim;
  133. // 76-79
  134. uint16_t nonTailBlockdim;
  135. uint16_t tailBlockdim;
  136. // 80-83
  137. uint32_t taskParamPtrBaseL;
  138. // 84-87
  139. uint16_t taskParamPtrBaseH;
  140. uint16_t taskParamPtrOffset;
  141. // 88-95
  142. uint32_t res10;
  143. uint32_t res11;
  144. // 96-103
  145. uint32_t nonTailTaskStartPcL;
  146. uint16_t nonTailTaskStartPcH;
  147. uint16_t res12;
  148. // 104-111
  149. uint32_t tailTaskStartPcL;
  150. uint16_t tailTaskStartPcH;
  151. uint16_t res13;
  152. // 112-119
  153. uint32_t res14;
  154. uint32_t res15;
  155. // 120-127
  156. uint16_t srcSlot[4]; // src_slot0-3(context ID for source data which is out of subgraph)
  157. } rtFftsPlusAicAivCtx_t;
  158. // mix aic/aiv context
  159. typedef struct tagFftsPlusMixAicAivCtx {
  160. // 0-3 bytes
  161. uint16_t contextType;
  162. uint8_t successorNum;
  163. uint8_t reserved1 : 6;
  164. uint8_t dumpSwitch : 1;
  165. uint8_t aten : 1;
  166. // 4-7
  167. uint8_t prefetchConfig;
  168. uint8_t reserved2;
  169. uint8_t predCntInit;
  170. uint8_t predCnt;
  171. // 8-11
  172. uint32_t reserved3;
  173. // 12-63
  174. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  175. // 64-67
  176. uint16_t reserved4 : 1;
  177. uint16_t schem : 2;
  178. uint16_t aicIcachePrefetchCnt : 5;
  179. uint16_t aivIcachePrefetchCnt : 5;
  180. uint16_t reserved5 : 2;
  181. uint16_t atm : 1;
  182. uint16_t prefetchEnableBitmap : 4;
  183. uint16_t reserved6 : 4;
  184. uint16_t prefetchOnceBitmap : 4;
  185. uint16_t reserved7 : 4;
  186. // 68-71
  187. uint16_t pmg : 2;
  188. uint16_t ns : 1;
  189. uint16_t partId : 8;
  190. uint16_t reserved8 : 1;
  191. uint16_t qos : 4;
  192. uint8_t nonTailBlockRatioN;
  193. uint8_t tailBlockRatioN;
  194. // 72-75
  195. uint16_t threadId;
  196. uint16_t threadDim;
  197. // 76-79
  198. uint16_t nonTailBlockdim;
  199. uint16_t tailBlockdim;
  200. // 80-87
  201. uint32_t aicTaskParamPtrL;
  202. uint16_t aicTaskParamPtrH;
  203. uint16_t aicTaskParamPtrOffset;
  204. // 88-95
  205. uint32_t aivTaskParamPtrL;
  206. uint16_t aivTaskParamPtrH;
  207. uint16_t aivTaskParamPtrOffset;
  208. // 96-103
  209. uint32_t nonTailAicTaskStartPcL;
  210. uint16_t nonTailAicTaskStartPcH;
  211. uint16_t tailAicTaskStartPcH;
  212. // 104-111
  213. uint32_t tailAicTaskStartPcL;
  214. uint32_t nonTailAivTaskStartPcL;
  215. // 112-119
  216. uint16_t nonTailAivTaskStartPcH;
  217. uint16_t tailAivTaskStartPcH;
  218. uint32_t tailAivTaskStartPcL;
  219. // 120-127
  220. uint16_t srcSlot[4]; // src_slot0-3(context ID for source data which is out of subgraph)
  221. } rtFftsPlusMixAicAivCtx_t;
  222. // sdma context
  223. typedef struct tagFftsPlusSdmaCtx {
  224. // 0-3 bytes
  225. uint16_t contextType;
  226. uint8_t successorNum;
  227. uint8_t res1 : 6;
  228. uint8_t dumpSwitch : 1;
  229. uint8_t aten : 1;
  230. // 4-7
  231. uint8_t res2;
  232. uint8_t res3;
  233. uint8_t predCntInit;
  234. uint8_t predCnt;
  235. // 8-11
  236. uint32_t res4;
  237. // 12-63
  238. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  239. // 64-67
  240. uint8_t res5;
  241. uint8_t res6 : 7;
  242. uint8_t atm : 1;
  243. uint16_t res7;
  244. // 68-71
  245. uint16_t pmg : 2;
  246. uint16_t ns : 1;
  247. uint16_t partId : 8;
  248. uint16_t res8 : 1;
  249. uint16_t qos : 4;
  250. uint16_t res9;
  251. // 72-75
  252. uint16_t threadId;
  253. uint16_t threadDim;
  254. // 76-79
  255. uint32_t sdmaSqeHeader; // (FORMAT/MPAMNS/PARTID/DRO/SRO/QOS/DNS/SNS/DSSV/SSSV/IE/UPCODE)
  256. // 80-83
  257. uint16_t sourceStreamId;
  258. uint16_t sourceSubstreamId;
  259. // 84-87
  260. uint16_t destinationStreamId;
  261. uint16_t destinationSubstreamId;
  262. // 88-127
  263. uint32_t sourceAddressBaseL;
  264. uint32_t sourceAddressBaseH;
  265. uint32_t sourceAddressOffset;
  266. uint32_t destinationAddressBaseL;
  267. uint32_t destinationAddressBaseH;
  268. uint32_t destinationAddressOffset;
  269. uint32_t nonTailDataLength;
  270. uint32_t tailDataLength;
  271. uint32_t res10[2];
  272. } rtFftsPlusSdmaCtx_t;
  273. // ffts plus notify record/wait context
  274. typedef struct tagFftsPlusNotifyCtx {
  275. // 0-3 bytes
  276. uint16_t contextType;
  277. uint8_t successorNum;
  278. uint8_t res : 7;
  279. uint8_t aten : 1;
  280. // 4-7
  281. uint8_t res1;
  282. uint8_t res2;
  283. uint8_t predCntInit;
  284. uint8_t predCnt;
  285. // 8-11
  286. uint32_t res3;
  287. // 12-63
  288. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  289. // 64-67
  290. uint16_t res4 : 14;
  291. uint16_t satm : 1;
  292. uint16_t atm : 1;
  293. uint16_t res6;
  294. // 68-71
  295. uint32_t res7;
  296. // 72-75
  297. uint16_t threadId;
  298. uint16_t threadDim;
  299. // 76-79
  300. uint16_t notifyIdBase;
  301. uint8_t autoWindow;
  302. uint8_t res8;
  303. // 80-127
  304. uint32_t res9[4];
  305. uint16_t notifyId[16];
  306. } rtFftsPlusNotifyCtx_t;
  307. // write Value context
  308. typedef struct tagFftsPlusWriteValueCtx {
  309. // 0-3 bytes
  310. uint16_t contextType;
  311. uint8_t successorNum;
  312. uint8_t resv1 : 7;
  313. uint8_t aten : 1;
  314. // 4-7
  315. uint8_t resv2;
  316. uint8_t resv3;
  317. uint8_t predCntInit;
  318. uint8_t predCnt;
  319. // 8-11
  320. uint32_t resv4;
  321. // 12-63
  322. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  323. // 64-67
  324. uint16_t resv5 : 15;
  325. uint16_t atm : 1;
  326. uint16_t resv6;
  327. // 68-71
  328. uint32_t resv7;
  329. // 72-75
  330. uint16_t threadId;
  331. uint16_t threadDim;
  332. // 76-79
  333. uint8_t awSize : 3;
  334. uint8_t awSnoop : 1;
  335. uint8_t resv8 : 4;
  336. uint8_t awCache : 4;
  337. uint8_t awProt : 3;
  338. uint8_t awVa : 1;
  339. uint8_t arSize : 3;
  340. uint8_t arSnoop : 1;
  341. uint8_t resv9 : 4;
  342. uint8_t arCache : 4;
  343. uint8_t arProt : 3;
  344. uint8_t arVa : 1;
  345. // 80-83
  346. uint32_t writeAddressBaseL;
  347. // 84-87
  348. uint32_t writeAddressBaseH : 17;
  349. uint32_t res10 : 15;
  350. // 88-91
  351. uint32_t writeAddressOffset;
  352. // 92-95
  353. uint32_t res11;
  354. // 96-111
  355. uint32_t writeValue[4]; // write_value_00 -> write_value_03
  356. // 112-127
  357. uint32_t res12[4];
  358. } rtFftsPlusWriteValueCtx_t;
  359. // ai cpu context
  360. typedef struct tagFftsPlusAiCpuCtx {
  361. // 0-3 bytes
  362. uint16_t contextType;
  363. uint8_t successorNum;
  364. uint8_t res1 : 6;
  365. uint8_t dumpSwitch : 1;
  366. uint8_t aten : 1;
  367. // 4-7
  368. uint8_t res2;
  369. uint8_t res3;
  370. uint8_t predCntInit;
  371. uint8_t predCnt;
  372. // 8-11
  373. uint32_t res4;
  374. // 12-63
  375. uint16_t successorContextID[RT_CTX_SUCCESSOR_NUM];
  376. // 64-67
  377. uint16_t res5 : 15;
  378. uint16_t atm : 1;
  379. uint16_t res6;
  380. // 68-71
  381. uint16_t sqeIndex;
  382. uint8_t kernelType : 7;
  383. uint8_t bm : 1;
  384. uint8_t topicType : 4;
  385. uint8_t qos : 3;
  386. uint8_t res7 : 1;
  387. // 72-75
  388. uint16_t threadId;
  389. uint16_t threadDim;
  390. // 76-79
  391. uint16_t nonTailBlockdim;
  392. uint16_t tailBlockdim;
  393. // 80-115
  394. uint32_t usrData[9]; // usr_data0 -> usr_data8 usr_data2(task_param_base_l) usr_data3(task_param_base_h)
  395. // 116--119
  396. uint32_t res8;
  397. // 120-123
  398. uint32_t subtopicId : 12;
  399. uint32_t topicId : 6;
  400. uint32_t groupId : 6;
  401. uint32_t usrDataLength : 8;
  402. // 124-127
  403. uint32_t taskParamOffset;
  404. } rtFftsPlusAiCpuCtx_t;
  405. // data context
  406. typedef struct tagFftsPlusDataCtx {
  407. // 0-3 bytes
  408. uint16_t contextType;
  409. uint8_t successorNum;
  410. uint8_t res1 : 7;
  411. uint8_t aten : 1;
  412. // 4-7
  413. uint8_t res2;
  414. uint8_t res3;
  415. uint8_t cntInit; // cons_cnt_init / prod_cnt_init
  416. uint8_t cnt; // cons_cnt / prod_cnt
  417. // 8-11
  418. uint32_t res4;
  419. // 12-63
  420. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  421. // 64-67
  422. uint16_t res5 : 15;
  423. uint16_t atm : 1;
  424. uint16_t res6;
  425. // 68-71
  426. uint16_t pmg : 2;
  427. uint16_t ns : 1;
  428. uint16_t partId : 8;
  429. uint16_t res7 : 1;
  430. uint16_t qos : 4;
  431. uint16_t res8;
  432. // 72-75
  433. uint16_t threadId;
  434. uint16_t threadDim;
  435. // 76-79
  436. uint16_t origConsumerCounter;
  437. uint16_t runConsumerCounter;
  438. // 80-83
  439. uint32_t addressBaseL;
  440. // 84-87
  441. uint32_t addressBaseH;
  442. // 88-91
  443. uint32_t addressOffset;
  444. // 92-95
  445. uint32_t res9;
  446. // 96-99
  447. uint16_t nonTailNumOutter;
  448. uint16_t nonTailNumInner;
  449. // 100-103
  450. uint32_t nonTailLengthInner;
  451. // 104-107
  452. uint32_t nonTailStrideOutter;
  453. // 108-111
  454. uint32_t nonTailStrideInner;
  455. // 112-115
  456. uint16_t tailNumOutter;
  457. uint16_t tailNumInner;
  458. // 116-119
  459. uint32_t tailLengthInner;
  460. // 120-123
  461. uint32_t tailStrideOutter;
  462. // 124-127
  463. uint32_t tailStrideInner;
  464. } rtFftsPlusDataCtx_t;
  465. // at start context
  466. typedef struct tagFftsPlusAtStartCtx {
  467. // 0-3 bytes
  468. uint16_t contextType;
  469. uint8_t successorNum;
  470. uint8_t rs1 : 7;
  471. uint8_t aten : 1;
  472. // 4-7
  473. uint8_t rs2;
  474. uint8_t rs3;
  475. uint8_t predCntInit;
  476. uint8_t predCnt;
  477. // 8-11
  478. uint32_t rs4;
  479. // 12-63
  480. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  481. // 64-67
  482. uint16_t rs5;
  483. uint16_t rs6;
  484. // 68-71
  485. uint16_t rs7;
  486. uint16_t rs8;
  487. // 72-75
  488. uint16_t threadId;
  489. uint16_t threadDim;
  490. // 76-79
  491. uint16_t threadIdInit;
  492. uint16_t threadWindowSize;
  493. // 80-127
  494. uint32_t res9[12];
  495. } rtFftsPlusAtStartCtx_t;
  496. // at end context
  497. #define RT_CTX_SUCC_AT_START_SLOT_NUM 12
  498. #define RT_CTX_SUCC_OUT_LABEL_SLOT_NUM 12
  499. typedef struct tagFftsPlusAtEndCtx {
  500. // 0-3 bytes
  501. uint16_t contextType;
  502. uint8_t atStartSlotNumber;
  503. uint8_t outLabelSlotNumber : 7;
  504. uint8_t aten : 1;
  505. // 4-7
  506. uint8_t res1;
  507. uint8_t res2;
  508. uint8_t predCntInit;
  509. uint8_t predCnt;
  510. // 8-11
  511. uint32_t res3;
  512. // 12-59
  513. uint16_t succAtStartSlot[RT_CTX_SUCC_AT_START_SLOT_NUM];
  514. uint16_t succOutLabelSlot[RT_CTX_SUCC_OUT_LABEL_SLOT_NUM];
  515. // 60-63
  516. uint16_t res4;
  517. uint16_t res5;
  518. // 64-67
  519. uint16_t res6;
  520. uint16_t res7;
  521. // 68-71
  522. uint16_t res8;
  523. uint16_t res9;
  524. // 72-75
  525. uint16_t threadId;
  526. uint16_t res10;
  527. // 76-79
  528. uint16_t res11;
  529. uint16_t res12;
  530. // 80-127
  531. uint32_t res13[12];
  532. } rtFftsPlusAtEndCtx_t;
  533. // label context
  534. typedef struct tagFftsPlusLabelCtx {
  535. // 0-3 bytes
  536. uint16_t contextType;
  537. uint8_t successorNum;
  538. uint8_t res1;
  539. // 4-7
  540. uint8_t res2;
  541. uint8_t res3;
  542. uint8_t predCntInit;
  543. uint8_t predCnt;
  544. // 8-11
  545. uint32_t res4;
  546. // 12-63
  547. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  548. // 64-79
  549. uint16_t res5[8];
  550. // 80-127
  551. uint32_t res6[12];
  552. } rtFftsPlusLabelCtx_t;
  553. // case switch context
  554. typedef struct tagFftsPlusCaseSwitchCtx {
  555. // 0-3 bytes
  556. uint16_t contextType;
  557. uint8_t successorNum;
  558. uint8_t resv0 : 7;
  559. uint8_t aten : 1;
  560. // 4-7
  561. uint8_t startLabelId;
  562. uint8_t labelListLen;
  563. uint8_t predCntInit;
  564. uint8_t predCnt;
  565. // 8-11
  566. uint32_t resv1;
  567. // 12-63
  568. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  569. // 64-67
  570. uint16_t resv2 : 15;
  571. uint16_t atm : 1;
  572. uint16_t resv3;
  573. // 68-71
  574. uint32_t resv4;
  575. // 72-75
  576. uint16_t threadId;
  577. uint16_t threadDim;
  578. // 76-79
  579. uint8_t arSize : 3;
  580. uint8_t snoop : 1;
  581. uint8_t resv5 : 4;
  582. uint8_t arCache : 4;
  583. uint8_t arProt : 3;
  584. uint8_t va : 1;
  585. uint16_t resv6;
  586. // 80-83
  587. uint32_t loadAddress0BaseL;
  588. // 84-87
  589. uint32_t loadAddress0BaseH : 17;
  590. uint32_t resv7 : 14;
  591. uint32_t ld0En : 1;
  592. // 88-91
  593. uint32_t loadAddress0Offset;
  594. // 92-95
  595. uint32_t resv8;
  596. // 96-99
  597. uint32_t loadAddress1BaseL;
  598. // 100-103
  599. uint32_t loadAddress1BaseH : 17;
  600. uint32_t resv9 : 14;
  601. uint32_t ld1En : 1;
  602. // 104-107
  603. uint32_t loadAddress1Offset;
  604. // 108-127
  605. uint32_t resv10[5];
  606. } rtFftsPlusCaseSwitchCtx_t;
  607. // case default context
  608. typedef struct tagFftsPlusCaseDefCtx {
  609. // 0-3 bytes
  610. uint16_t contextType;
  611. uint8_t successorNum;
  612. uint8_t rs0 : 7;
  613. uint8_t aten : 1;
  614. // 4-7
  615. uint8_t startLabelId;
  616. uint8_t labelListLen;
  617. uint8_t predCntInit;
  618. uint8_t predCnt;
  619. // 8-11
  620. uint32_t rs1;
  621. // 12-63
  622. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  623. // 64-67
  624. uint16_t rs2;
  625. uint16_t rs3;
  626. // 68-127
  627. uint32_t rs4[15];
  628. } rtFftsPlusCaseDefCtx_t;
  629. // condition switch context
  630. #define RT_CTX_TRUE_SUCCESSOR_NUM 13
  631. #define RT_CTX_FALSE_SUCCESSOR_NUM 13
  632. typedef struct tagFftsPlusCondSwitchCtx {
  633. // 0-3 bytes
  634. uint16_t contextType;
  635. uint8_t trueSuccessorNum;
  636. uint8_t falseSuccessorNum : 7;
  637. uint8_t aten : 1;
  638. // 4-7
  639. uint8_t condition;
  640. uint8_t res1;
  641. uint8_t predCntInit;
  642. uint8_t predCnt;
  643. // 8-11
  644. uint32_t res2;
  645. // 12-63
  646. uint16_t trueSuccessorList[RT_CTX_TRUE_SUCCESSOR_NUM];
  647. uint16_t falseSuccessorList[RT_CTX_FALSE_SUCCESSOR_NUM];
  648. // 64-67
  649. uint16_t res3 : 15;
  650. uint16_t atm : 1;
  651. uint16_t res4;
  652. // 68-71
  653. uint32_t res5;
  654. // 72-75
  655. uint16_t threadId;
  656. uint16_t threadDim;
  657. // 76-79
  658. uint8_t arSize : 3;
  659. uint8_t snoop : 1;
  660. uint8_t res6 : 4;
  661. uint8_t arCache : 4;
  662. uint8_t arProt : 3;
  663. uint8_t va : 1;
  664. uint16_t res7;
  665. // 80-83
  666. uint32_t loadAddress0BaseL;
  667. // 84-87
  668. uint32_t loadAddress0BaseH : 17;
  669. uint32_t res8 : 14;
  670. uint32_t ld0En : 1;
  671. // 88-91
  672. uint32_t loadAddress0Offset;
  673. // 92-95
  674. uint32_t res9;
  675. // 96-99
  676. uint32_t loadAddress1BaseL;
  677. // 100-103
  678. uint32_t loadAddress1BaseH : 17;
  679. uint32_t res10 : 14;
  680. uint32_t ld1En : 1;
  681. // 104-107
  682. uint32_t loadAddress1Offset;
  683. // 108-127
  684. uint32_t res11[3];
  685. uint32_t cmpValue1;
  686. uint32_t cmpValue2;
  687. } rtFftsPlusCondSwitchCtx_t;
  688. // ffts plus persistent cache context
  689. typedef struct tagFftsPlusPersistentCacheCtx {
  690. // 0- 3bytes
  691. uint16_t contextType;
  692. uint8_t successorNum;
  693. uint8_t res1 : 7;
  694. uint8_t aten : 1;
  695. // 4-7
  696. uint8_t res2[2];
  697. uint8_t predCntInit;
  698. uint8_t predCnt;
  699. // 8-11
  700. uint8_t res3[4];
  701. // 12-63
  702. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  703. // 64-67
  704. uint8_t persistentEnable : 1;
  705. uint8_t res4 : 7;
  706. uint8_t res5;
  707. uint16_t persistentSize;
  708. // 68-71
  709. uint32_t persistentId;
  710. // 72-127
  711. uint32_t res6[14];
  712. } rtFftsPlusPersistentCacheCtx_t;
  713. #pragma pack(pop)
  714. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  715. }
  716. #endif
  717. #endif // CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示