You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

rt_stars_define.h 3.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. /*
  2. * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
  3. * Description: the definition of stars
  4. */
  5. #ifndef CCE_RUNTIME_RT_STARS_DEFINE_H
  6. #define CCE_RUNTIME_RT_STARS_DEFINE_H
  7. #include "base.h"
  8. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  9. extern "C" {
  10. #endif
  11. #pragma pack(push)
  12. #pragma pack (1)
  13. typedef struct tagStarsSqeHeader {
  14. uint8_t type : 6;
  15. uint8_t l1Lock : 1;
  16. uint8_t l1Unlock : 1;
  17. uint8_t ie : 2;
  18. uint8_t preP : 2;
  19. uint8_t postP : 2;
  20. uint8_t wrCqe : 1;
  21. uint8_t reserved : 1;
  22. uint16_t blockDim;
  23. uint16_t rtStreamId;
  24. uint16_t taskId;
  25. } rtStarsSqeHeader_t;
  26. typedef struct tagStarsDsaSqe {
  27. // 0-7 bytes
  28. rtStarsSqeHeader_t sqeHeader;
  29. // 8-11 bytes
  30. uint32_t start : 1;
  31. uint32_t functionType : 3;
  32. uint32_t dataType : 3;
  33. uint32_t algoType : 3;
  34. uint32_t paramVldBitmap : 5;
  35. uint32_t paramAddrValBitmap : 7;
  36. uint32_t reserved0 : 10;
  37. // 12-15 bytes
  38. uint16_t sqeIndex;
  39. uint8_t kernelCredit;
  40. uint8_t reserved1;
  41. // 16-31 bytes
  42. uint32_t dsaCfgResultAddrLow;
  43. uint32_t dsaCfgResultAddrHigh;
  44. uint32_t dsaCfgStateAddrLow;
  45. uint32_t dsaCfgStateAddrHigh;
  46. // 32-47 bytes
  47. uint32_t dsaCfgParamAddrLow;
  48. uint32_t dsaCfgParamAddrHigh;
  49. uint32_t dsaCfgSeedLow;
  50. uint32_t dsaCfgSeedHigh;
  51. // 48-63 bytes
  52. uint32_t dsaCfgNumberLow;
  53. uint32_t dsaCfgNumberHigh;
  54. uint32_t reserved2[2];
  55. } rtStarsDsaSqe_t;
  56. // ffts+ type
  57. typedef enum tagFftsPlusType {
  58. RT_FFTS_PLUS_TYPE_RES1 = 2, // Reserved
  59. RT_FFTS_PLUS_TYPE_RES2 = 3, // Reserved
  60. RT_FFTS_PLUS_TYPE = 4, // FFTS+ mode
  61. } rtFftsPlusType_t;
  62. // ffts+ sqe
  63. typedef struct tagFftsPlusSqe {
  64. // 0-7 bytes
  65. rtStarsSqeHeader_t sqeHeader;
  66. // 8-11 bytes
  67. uint16_t fftsType : 3;
  68. uint16_t reserved1 : 9;
  69. uint16_t wrrRatio : 4;
  70. uint16_t reserved2;
  71. // 12-15 bytes
  72. uint16_t sqeIndex;
  73. uint8_t kernelCredit;
  74. uint8_t reserved4;
  75. // 16-23 bytes
  76. uint32_t stackPhyBaseL;
  77. uint32_t stackPhyBaseH;
  78. // 24-31 bytes
  79. uint16_t totalContextNum;
  80. uint16_t readyContextNum;
  81. uint16_t preloadContextNum;
  82. uint16_t reserved5;
  83. // 32-35 bytes
  84. uint16_t reserved6;
  85. uint16_t prefetchOstNum : 5;
  86. uint16_t reserved9 : 3;
  87. uint16_t cmaintOstNum : 5;
  88. uint16_t reserved10 : 3;
  89. // 36-39 bytes
  90. uint16_t aicPrefetchLower : 5;
  91. uint16_t reserved11 : 3;
  92. uint16_t aicPrefetchUpper : 5;
  93. uint16_t reserved12 : 3;
  94. uint16_t aivPrefetchLower : 5;
  95. uint16_t reserved13 : 3;
  96. uint16_t aivPrefetchUpper : 5;
  97. uint16_t reserved14 : 3;
  98. // 40-47 bytes
  99. uint32_t contextAddressBaseL;
  100. uint32_t contextAddressBaseH : 17;
  101. uint32_t reserved15 : 15;
  102. // 48-63 bytes
  103. uint32_t reserved16[4];
  104. } rtFftsPlusSqe_t;
  105. #pragma pack(pop)
  106. #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
  107. }
  108. #endif
  109. #endif // CCE_RUNTIME_RT_STARS_DEFINE_H

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示