You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

rt_ffts_plus_define.h 17 kB


  1. /*
  2. * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  3. * Description: the definition of ffts plus
  4. */
  5. #ifndef CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H
  6. #define CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H
  7. #include "base.h"
  8. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  9. extern "C" {
  10. #endif
  11. #pragma pack(push)
  12. #pragma pack (1)
  13. // hardware context type
  14. typedef enum tagFftsPlusHwType {
  15. RT_HW_CTX_TYPE_AIC = 0,
  16. RT_HW_CTX_TYPE_AIV = 1,
  17. RT_HW_CTX_TYPE_NOTIFY_WAIT = 3,
  18. RT_HW_CTX_TYPE_NOTIFY_RECORD = 4,
  19. RT_HW_CTX_TYPE_WRITE_VALUE = 5,
  20. RT_HW_CTX_TYPE_MIX_AIC = 6,
  21. RT_HW_CTX_TYPE_MIX_AIV = 7,
  22. RT_HW_CTX_TYPE_SDMA = 8,
  23. RT_HW_CTX_TYPE_FLUSH_DATA = 9,
  24. RT_HW_CTX_TYPE_INVALIDATE_DATA = 10,
  25. RT_HW_CTX_TYPE_WRITEBACK_DATA = 11,
  26. RT_HW_CTX_TYPE_AICPU = 12,
  27. RT_HW_CTX_TYPE_LOAD = 13,
  28. RT_HW_CTX_TYPE_MAX = 14,
  29. } rtFftsPlusHwType_t;
  30. // hardware context type
  31. typedef enum tagFftsPlusSoftType {
  32. RT_SOFT_CTX_TYPE_COND_SWITCH = 1,
  33. RT_SOFT_CTX_TYPE_CASE_SWITCH = 2,
  34. RT_SOFT_CTX_TYPE_AT_START = 3,
  35. RT_SOFT_CTX_TYPE_AT_END = 4,
  36. RT_SOFT_CTX_TYPE_LABEL = 5,
  37. RT_SOFT_CTX_TYPE_MAX = 6,
  38. } rtFftsPlusSoftType_t;
  39. typedef enum tagFftsPlusContextType {
  40. RT_CTX_TYPE_AICORE = 0x0000,
  41. RT_CTX_TYPE_AIV = 0x0001,
  42. RT_CTX_TYPE_NOTIFY_WAIT = 0x0003,
  43. RT_CTX_TYPE_NOTIFY_RECORD = 0x0004,
  44. RT_CTX_TYPE_WRITE_VALUE = 0x0005,
  45. RT_CTX_TYPE_MIX_AIC = 0x0006,
  46. RT_CTX_TYPE_MIX_AIV = 0x0007,
  47. RT_CTX_TYPE_SDMA = 0x0008,
  48. RT_CTX_TYPE_FLUSH_DATA = 0x0009,
  49. RT_CTX_TYPE_INVALIDATE_DATA = 0x000A,
  50. RT_CTX_TYPE_WRITEBACK_DATA = 0x000B,
  51. RT_CTX_TYPE_AICPU = 0x000C,
  52. RT_CTX_TYPE_COND_SWITCH = 0x010D,
  53. RT_CTX_TYPE_CASE_SWITCH = 0x020D,
  54. RT_CTX_TYPE_AT_START = 0x0300,
  55. RT_CTX_TYPE_AT_END = 0x0400,
  56. RT_CTX_TYPE_LABEL = 0x0500,
  57. }rtFftsPlusContextType_t;
  58. // condition type
  59. typedef enum tagFftsPlusCondType {
  60. RT_COND_TYPE_EQUAL = 0,
  61. RT_COND_TYPE_NOTEQUAL = 1,
  62. RT_COND_TYPE_GREATER = 2,
  63. RT_COND_TYPE_GREATER_OR_EQUAL = 3,
  64. RT_COND_TYPE_LESS = 4,
  65. RT_COND_TYPE_LESS_OR_EQUAL = 5,
  66. RT_COND_TYPE_MAX = 6,
  67. } rtFftsPlusCondType_t;
  68. // the definition of ffts plus context
  69. #define RT_CTX_SUCCESSOR_NUM 26
  70. // ffts plus common context
  71. typedef struct tagFftsPlusComCtx {
  72. // 0-3 bytes
  73. uint16_t contextType;
  74. uint8_t successorNum;
  75. uint8_t rsv1 : 7;
  76. uint8_t aten : 1;
  77. // 4-7
  78. uint8_t rsv2;
  79. uint8_t rsv3;
  80. uint8_t predCntInit;
  81. uint8_t predCnt;
  82. // 8-11
  83. uint32_t rsv4;
  84. // 12-63
  85. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  86. // 64-71
  87. uint32_t rsv5[2];
  88. // 72-75
  89. uint16_t threadId;
  90. uint16_t threadDim;
  91. // 76-127
  92. uint32_t res6[13];
  93. } rtFftsPlusComCtx_t;
  94. // aic/aiv context
  95. typedef struct tagFftsPlusAicAivCtx {
  96. // 0-3 bytes
  97. uint16_t contextType;
  98. uint8_t successorNum;
  99. uint8_t resv : 7;
  100. uint8_t aten : 1;
  101. // 4-7
  102. uint8_t prefetchConfig;
  103. uint8_t resv1;
  104. uint8_t predCntInit;
  105. uint8_t predCnt;
  106. // 8-11
  107. uint32_t resv2;
  108. // 12-63
  109. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  110. // 64-67
  111. uint16_t resv3 : 1;
  112. uint16_t schem : 2;
  113. uint16_t icachePrefetchCnt : 5;
  114. uint16_t resv4 : 7;
  115. uint16_t atm : 1;
  116. uint16_t prefetchEnableBitmap : 4;
  117. uint16_t res6 : 4;
  118. uint16_t prefetchOnceBitmap : 4;
  119. uint16_t res7 : 4;
  120. // 68-71
  121. uint16_t pmg : 2;
  122. uint16_t ns : 1;
  123. uint16_t partId : 8;
  124. uint16_t res8 : 1;
  125. uint16_t qos : 4;
  126. uint16_t res9;
  127. // 72-75
  128. uint16_t threadId;
  129. uint16_t threadDim;
  130. // 76-79
  131. uint16_t nonTailBlockdim;
  132. uint16_t tailBlockdim;
  133. // 80-83
  134. uint32_t taskParamPtrBaseL;
  135. // 84-87
  136. uint16_t taskParamPtrBaseH;
  137. uint16_t taskParamPtrOffset;
  138. // 88-95
  139. uint32_t res10;
  140. uint32_t res11;
  141. // 96-103
  142. uint32_t nonTailTaskStartPcL;
  143. uint16_t nonTailTaskStartPcH;
  144. uint16_t res12;
  145. // 104-111
  146. uint32_t tailTaskStartPcL;
  147. uint16_t tailTaskStartPcH;
  148. uint16_t res13;
  149. // 112-119
  150. uint32_t res14;
  151. uint32_t res15;
  152. // 120-127
  153. uint16_t srcSlot[4]; // src_slot0-3(context ID for source data which is out of subgraph)
  154. } rtFftsPlusAicAivCtx_t;
  155. // mix aic/aiv context
  156. typedef struct tagFftsPlusMixAicAivCtx {
  157. // 0-3 bytes
  158. uint16_t contextType;
  159. uint8_t successorNum;
  160. uint8_t reserved1 : 7;
  161. uint8_t aten : 1;
  162. // 4-7
  163. uint8_t prefetchConfig;
  164. uint8_t reserved2;
  165. uint8_t predCntInit;
  166. uint8_t predCnt;
  167. // 8-11
  168. uint32_t reserved3;
  169. // 12-63
  170. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  171. // 64-67
  172. uint16_t reserved4 : 1;
  173. uint16_t schem : 2;
  174. uint16_t aicIcachePrefetchCnt : 5;
  175. uint16_t aivIcachePrefetchCnt : 5;
  176. uint16_t reserved5 : 2;
  177. uint16_t atm : 1;
  178. uint16_t prefetchEnableBitmap : 4;
  179. uint16_t reserved6 : 4;
  180. uint16_t prefetchOnceBitmap : 4;
  181. uint16_t reserved7 : 4;
  182. // 68-71
  183. uint16_t pmg : 2;
  184. uint16_t ns : 1;
  185. uint16_t partId : 8;
  186. uint16_t reserved8 : 1;
  187. uint16_t qos : 4;
  188. uint8_t nonTailBlockRatioN;
  189. uint8_t tailBlockRatioN;
  190. // 72-75
  191. uint16_t threadId;
  192. uint16_t threadDim;
  193. // 76-79
  194. uint16_t nonTailBlockdim;
  195. uint16_t tailBlockdim;
  196. // 80-87
  197. uint32_t aicTaskParamPtrL;
  198. uint16_t aicTaskParamPtrH;
  199. uint16_t aicTaskParamPtrOffset;
  200. // 88-95
  201. uint32_t aivTaskParamPtrL;
  202. uint16_t aivTaskParamPtrH;
  203. uint16_t aivTaskParamPtrOffset;
  204. // 96-103
  205. uint32_t nonTailAicTaskStartPcL;
  206. uint16_t nonTailAicTaskStartPcH;
  207. uint16_t tailAicTaskStartPcH;
  208. // 104-111
  209. uint32_t tailAicTaskStartPcL;
  210. uint32_t nonTailAivTaskStartPcL;
  211. // 112-119
  212. uint16_t nonTailAivTaskStartPcH;
  213. uint16_t tailAivTaskStartPcH;
  214. uint32_t tailAivTaskStartPcL;
  215. // 120-127
  216. uint16_t srcSlot[4]; // src_slot0-3(context ID for source data which is out of subgraph)
  217. } rtFftsPlusMixAicAivCtx_t;
  218. // sdma context
  219. typedef struct tagFftsPlusSdmaCtx {
  220. // 0-3 bytes
  221. uint16_t contextType;
  222. uint8_t successorNum;
  223. uint8_t res1 : 7;
  224. uint8_t aten : 1;
  225. // 4-7
  226. uint8_t res2;
  227. uint8_t res3;
  228. uint8_t predCntInit;
  229. uint8_t predCnt;
  230. // 8-11
  231. uint32_t res4;
  232. // 12-63
  233. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  234. // 64-67
  235. uint8_t res5;
  236. uint8_t res6 : 7;
  237. uint8_t atm : 1;
  238. uint16_t res7;
  239. // 68-71
  240. uint16_t pmg : 2;
  241. uint16_t ns : 1;
  242. uint16_t partId : 8;
  243. uint16_t res8 : 1;
  244. uint16_t qos : 4;
  245. uint16_t res9;
  246. // 72-75
  247. uint16_t threadId;
  248. uint16_t threadDim;
  249. // 76-79
  250. uint32_t sdmaSqeHeader; // (FORMAT/MPAMNS/PARTID/DRO/SRO/QOS/DNS/SNS/DSSV/SSSV/IE/UPCODE)
  251. // 80-83
  252. uint16_t sourceStreamId;
  253. uint16_t sourceSubstreamId;
  254. // 84-87
  255. uint16_t destinationStreamId;
  256. uint16_t destinationSubstreamId;
  257. // 88-127
  258. uint32_t sourceAddressBaseL;
  259. uint32_t sourceAddressBaseH;
  260. uint32_t sourceAddressOffset;
  261. uint32_t destinationAddressBaseL;
  262. uint32_t destinationAddressBaseH;
  263. uint32_t destinationAddressOffset;
  264. uint32_t nonTailDataLength;
  265. uint32_t tailDataLength;
  266. uint32_t res10[2];
  267. } rtFftsPlusSdmaCtx_t;
  268. // ffts plus notify record/wait context
  269. typedef struct tagFftsPlusNotifyCtx {
  270. // 0-3 bytes
  271. uint16_t contextType;
  272. uint8_t successorNum;
  273. uint8_t res : 7;
  274. uint8_t aten : 1;
  275. // 4-7
  276. uint8_t res1;
  277. uint8_t res2;
  278. uint8_t predCntInit;
  279. uint8_t predCnt;
  280. // 8-11
  281. uint32_t res3;
  282. // 12-63
  283. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  284. // 64-67
  285. uint16_t res4 : 14;
  286. uint16_t satm : 1;
  287. uint16_t atm : 1;
  288. uint16_t res6;
  289. // 68-71
  290. uint32_t res7;
  291. // 72-75
  292. uint16_t threadId;
  293. uint16_t threadDim;
  294. // 76-79
  295. uint16_t notifyIdBase;
  296. uint8_t autoWindow;
  297. uint8_t res8;
  298. // 80-127
  299. uint32_t res9[4];
  300. uint16_t notifyId[16];
  301. } rtFftsPlusNotifyCtx_t;
  302. // write Value context
  303. typedef struct tagFftsPlusWriteValueCtx {
  304. // 0-3 bytes
  305. uint16_t contextType;
  306. uint8_t successorNum;
  307. uint8_t resv1 : 7;
  308. uint8_t aten : 1;
  309. // 4-7
  310. uint8_t resv2;
  311. uint8_t resv3;
  312. uint8_t predCntInit;
  313. uint8_t predCnt;
  314. // 8-11
  315. uint32_t resv4;
  316. // 12-63
  317. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  318. // 64-67
  319. uint16_t resv5 : 15;
  320. uint16_t atm : 1;
  321. uint16_t resv6;
  322. // 68-71
  323. uint32_t resv7;
  324. // 72-75
  325. uint16_t threadId;
  326. uint16_t threadDim;
  327. // 76-79
  328. uint8_t awSize : 3;
  329. uint8_t awSnoop : 1;
  330. uint8_t resv8 : 4;
  331. uint8_t awCache : 4;
  332. uint8_t awProt : 3;
  333. uint8_t awVa : 1;
  334. uint8_t arSize : 3;
  335. uint8_t arSnoop : 1;
  336. uint8_t resv9 : 4;
  337. uint8_t arCache : 4;
  338. uint8_t arProt : 3;
  339. uint8_t arVa : 1;
  340. // 80-83
  341. uint32_t writeAddressBaseL;
  342. // 84-87
  343. uint32_t writeAddressBaseH: 17;
  344. uint32_t res10: 15;
  345. // 88-91
  346. uint32_t writeAddressOffset;
  347. // 92-95
  348. uint32_t res11;
  349. // 96-111
  350. uint32_t writeValue[4]; // write_value_00 -> write_value_03
  351. // 112-127
  352. uint32_t res12[4];
  353. } rtFftsPlusWriteValueCtx_t;
  354. // ai cpu context
  355. typedef struct tagFftsPlusAiCpuCtx {
  356. // 0-3 bytes
  357. uint16_t contextType;
  358. uint8_t successorNum;
  359. uint8_t res1 : 7;
  360. uint8_t aten : 1;
  361. // 4-7
  362. uint8_t res2;
  363. uint8_t res3;
  364. uint8_t predCntInit;
  365. uint8_t predCnt;
  366. // 8-11
  367. uint32_t res4;
  368. // 12-63
  369. uint16_t successorContextID[RT_CTX_SUCCESSOR_NUM];
  370. // 64-67
  371. uint16_t res5 : 15;
  372. uint16_t atm : 1;
  373. uint16_t res6;
  374. // 68-71
  375. uint16_t sqeIndex;
  376. uint8_t kernelType : 7;
  377. uint8_t bm : 1;
  378. uint8_t topicType : 4;
  379. uint8_t qos : 3;
  380. uint8_t res7 : 1;
  381. // 72-75
  382. uint16_t threadId;
  383. uint16_t threadDim;
  384. // 76-79
  385. uint16_t nonTailBlockdim;
  386. uint16_t tailBlockdim;
  387. // 80-115
  388. uint32_t usrData[9]; // usr_data0 -> usr_data8 usr_data2(task_param_base_l) usr_data3(task_param_base_h)
  389. // 116--119
  390. uint32_t res8;
  391. // 120-123
  392. uint32_t subtopicId : 12;
  393. uint32_t topicId : 6;
  394. uint32_t groupId : 6;
  395. uint32_t usrDataLength : 8;
  396. // 124-127
  397. uint32_t taskParamOffset;
  398. } rtFftsPlusAiCpuCtx_t;
  399. // data context
  400. typedef struct tagFftsPlusDataCtx {
  401. // 0-3 bytes
  402. uint16_t contextType;
  403. uint8_t successorNum;
  404. uint8_t res1 : 7;
  405. uint8_t aten : 1;
  406. // 4-7
  407. uint8_t res2;
  408. uint8_t res3;
  409. uint8_t cntInit; // cons_cnt_init / prod_cnt_init
  410. uint8_t cnt; // cons_cnt / prod_cnt
  411. // 8-11
  412. uint32_t res4;
  413. // 12-63
  414. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  415. // 64-67
  416. uint16_t res5 : 15;
  417. uint16_t atm : 1;
  418. uint16_t res6;
  419. // 68-71
  420. uint16_t pmg : 2;
  421. uint16_t ns : 1;
  422. uint16_t partId : 8;
  423. uint16_t res7 : 1;
  424. uint16_t qos : 4;
  425. uint16_t res8;
  426. // 72-75
  427. uint16_t threadId;
  428. uint16_t threadDim;
  429. // 76-79
  430. uint16_t origConsumerCounter;
  431. uint16_t runConsumerCounter;
  432. // 80-83
  433. uint32_t addressBaseL;
  434. // 84-87
  435. uint32_t addressBaseH;
  436. // 88-91
  437. uint32_t addressOffset;
  438. // 92-95
  439. uint32_t res9;
  440. // 96-99
  441. uint16_t nonTailNumOutter;
  442. uint16_t nonTailNumInner;
  443. // 100-103
  444. uint32_t nonTailLengthInner;
  445. // 104-107
  446. uint32_t nonTailStrideOutter;
  447. // 108-111
  448. uint32_t nonTailStrideInner;
  449. // 112-115
  450. uint16_t tailNumOutter;
  451. uint16_t tailNumInner;
  452. // 116-119
  453. uint32_t tailLengthInner;
  454. // 120-123
  455. uint32_t tailStrideOutter;
  456. // 124-127
  457. uint32_t tailStrideInner;
  458. } rtFftsPlusDataCtx_t;
  459. // at start context
  460. typedef struct tagFftsPlusAtStartCtx {
  461. // 0-3 bytes
  462. uint16_t contextType;
  463. uint8_t successorNum;
  464. uint8_t rs1 : 7;
  465. uint8_t aten : 1;
  466. // 4-7
  467. uint8_t rs2;
  468. uint8_t rs3;
  469. uint8_t predCntInit;
  470. uint8_t predCnt;
  471. // 8-11
  472. uint32_t rs4;
  473. // 12-63
  474. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  475. // 64-67
  476. uint16_t rs5;
  477. uint16_t rs6;
  478. // 68-71
  479. uint16_t rs7;
  480. uint16_t rs8;
  481. // 72-75
  482. uint16_t threadId;
  483. uint16_t threadDim;
  484. // 76-79
  485. uint16_t threadIdInit;
  486. uint16_t threadWindowSize;
  487. // 80-127
  488. uint32_t res9[12];
  489. } rtFftsPlusAtStartCtx_t;
  490. // at end context
  491. #define RT_CTX_SUCC_AT_START_SLOT_NUM 12
  492. #define RT_CTX_SUCC_OUT_LABEL_SLOT_NUM 12
  493. typedef struct tagFftsPlusAtEndCtx {
  494. // 0-3 bytes
  495. uint16_t contextType;
  496. uint8_t atStartSlotNumber;
  497. uint8_t outLabelSlotNumber : 7;
  498. uint8_t aten : 1;
  499. // 4-7
  500. uint8_t res1;
  501. uint8_t res2;
  502. uint8_t predCntInit;
  503. uint8_t predCnt;
  504. // 8-11
  505. uint32_t res3;
  506. // 12-59
  507. uint16_t succAtStartSlot[RT_CTX_SUCC_AT_START_SLOT_NUM];
  508. uint16_t succOutLabelSlot[RT_CTX_SUCC_OUT_LABEL_SLOT_NUM];
  509. // 60-63
  510. uint16_t res4;
  511. uint16_t res5;
  512. // 64-67
  513. uint16_t res6;
  514. uint16_t res7;
  515. // 68-71
  516. uint16_t res8;
  517. uint16_t res9;
  518. // 72-75
  519. uint16_t threadId;
  520. uint16_t res10;
  521. // 76-79
  522. uint16_t res11;
  523. uint16_t res12;
  524. // 80-127
  525. uint32_t res13[12];
  526. } rtFftsPlusAtEndCtx_t;
  527. // label context
  528. typedef struct tagFftsPlusLabelCtx {
  529. // 0-3 bytes
  530. uint16_t contextType;
  531. uint8_t successorNum;
  532. uint8_t res1;
  533. // 4-7
  534. uint8_t res2;
  535. uint8_t res3;
  536. uint8_t predCntInit;
  537. uint8_t predCnt;
  538. // 8-11
  539. uint32_t res4;
  540. // 12-63
  541. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  542. // 64-79
  543. uint16_t res5[8];
  544. // 80-127
  545. uint32_t res6[12];
  546. } rtFftsPlusLabelCtx_t;
  547. // case switch context
  548. typedef struct tagFftsPlusCaseSwitchCtx {
  549. // 0-3 bytes
  550. uint16_t contextType;
  551. uint8_t successorNum;
  552. uint8_t resv0 : 7;
  553. uint8_t aten : 1;
  554. // 4-7
  555. uint8_t startLabelId;
  556. uint8_t labelListLen;
  557. uint8_t predCntInit;
  558. uint8_t predCnt;
  559. // 8-11
  560. uint32_t resv1;
  561. // 12-63
  562. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  563. // 64-67
  564. uint16_t resv2 : 15;
  565. uint16_t atm : 1;
  566. uint16_t resv3;
  567. // 68-71
  568. uint32_t resv4;
  569. // 72-75
  570. uint16_t threadId;
  571. uint16_t threadDim;
  572. // 76-79
  573. uint8_t arSize : 3;
  574. uint8_t snoop : 1;
  575. uint8_t resv5 : 4;
  576. uint8_t arCache : 4;
  577. uint8_t arProt : 3;
  578. uint8_t va : 1;
  579. uint16_t resv6;
  580. // 80-83
  581. uint32_t loadAddress0BaseL;
  582. // 84-87
  583. uint32_t loadAddress0BaseH : 17;
  584. uint32_t resv7 : 14;
  585. uint32_t ld0En : 1;
  586. // 88-91
  587. uint32_t loadAddress0Offset;
  588. // 92-95
  589. uint32_t resv8;
  590. // 96-99
  591. uint32_t loadAddress1BaseL;
  592. // 100-103
  593. uint32_t loadAddress1BaseH : 17;
  594. uint32_t resv9 : 14;
  595. uint32_t ld1En : 1;
  596. // 104-107
  597. uint32_t loadAddress1Offset;
  598. // 108-127
  599. uint32_t resv10[5];
  600. } rtFftsPlusCaseSwitchCtx_t;
  601. // case default context
  602. typedef struct tagFftsPlusCaseDefCtx {
  603. // 0-3 bytes
  604. uint16_t contextType;
  605. uint8_t successorNum;
  606. uint8_t rs0 : 7;
  607. uint8_t aten : 1;
  608. // 4-7
  609. uint8_t startLabelId;
  610. uint8_t labelListLen;
  611. uint8_t predCntInit;
  612. uint8_t predCnt;
  613. // 8-11
  614. uint32_t rs1;
  615. // 12-63
  616. uint16_t successorList[RT_CTX_SUCCESSOR_NUM];
  617. // 64-67
  618. uint16_t rs2;
  619. uint16_t rs3;
  620. // 68-127
  621. uint32_t rs4[15];
  622. } rtFftsPlusCaseDefCtx_t;
  623. // condition switch context
  624. #define RT_CTX_TRUE_SUCCESSOR_NUM 13
  625. #define RT_CTX_FALSE_SUCCESSOR_NUM 13
  626. typedef struct tagFftsPlusCondSwitchCtx {
  627. // 0-3 bytes
  628. uint16_t contextType;
  629. uint8_t trueSuccessorNum;
  630. uint8_t falseSuccessorNum : 7;
  631. uint8_t aten : 1;
  632. // 4-7
  633. uint8_t condition;
  634. uint8_t res1;
  635. uint8_t predCntInit;
  636. uint8_t predCnt;
  637. // 8-11
  638. uint32_t res2;
  639. // 12-63
  640. uint16_t trueSuccessorList[RT_CTX_TRUE_SUCCESSOR_NUM];
  641. uint16_t falseSuccessorList[RT_CTX_FALSE_SUCCESSOR_NUM];
  642. // 64-67
  643. uint16_t res3 : 15;
  644. uint16_t atm : 1;
  645. uint16_t res4;
  646. // 68-71
  647. uint32_t res5;
  648. // 72-75
  649. uint16_t threadId;
  650. uint16_t threadDim;
  651. // 76-79
  652. uint8_t arSize : 3;
  653. uint8_t snoop : 1;
  654. uint8_t res6 : 4;
  655. uint8_t arCache : 4;
  656. uint8_t arProt : 3;
  657. uint8_t va : 1;
  658. uint16_t res7;
  659. // 80-83
  660. uint32_t loadAddress0BaseL;
  661. // 84-87
  662. uint32_t loadAddress0BaseH : 17;
  663. uint32_t res8 : 14;
  664. uint32_t ld0En : 1;
  665. // 88-91
  666. uint32_t loadAddress0Offset;
  667. // 92-95
  668. uint32_t res9;
  669. // 96-99
  670. uint32_t loadAddress1BaseL;
  671. // 100-103
  672. uint32_t loadAddress1BaseH : 17;
  673. uint32_t res10 : 14;
  674. uint32_t ld1En : 1;
  675. // 104-107
  676. uint32_t loadAddress1Offset;
  677. // 108-127
  678. uint32_t res11[3];
  679. uint32_t cmpValue1;
  680. uint32_t cmpValue2;
  681. } rtFftsPlusCondSwitchCtx_t;
  682. #pragma pack(pop)
  683. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  684. }
  685. #endif
  686. #endif // CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示