You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

base.h 5.8 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. /*
  2. * Copyright (c) Huawei Technologies Co., Ltd. 2019-2022. All rights reserved.
  3. * Description: HCOM data type definition
  4. * Author: ligang
  5. * Create: 2019-05-24
  6. */
  7. #ifndef HCCL_BASE_H_
  8. #define HCCL_BASE_H_
  9. #include <hccl/hccl_types.h>
  10. #include <string>
  11. #ifdef __cplusplus
  12. extern "C" {
  13. #endif // __cplusplus
  14. typedef signed char s8;
  15. typedef signed short s16;
  16. typedef signed int s32;
  17. typedef signed long long s64;
  18. typedef unsigned char u8;
  19. typedef unsigned short u16;
  20. typedef unsigned int u32;
  21. typedef unsigned long long u64;
  22. /**
  23. * @brief Horovod Reduction opperation
  24. */
  25. typedef enum {
  26. HOROVOD_REDUCE_AVERAGE = 0, /**< average */
  27. HOROVOD_REDUCE_SUM = 1, /**< sum */
  28. HOROVOD_REDUCE_ADASUM = 2, /**< adasum */
  29. HOROVOD_REDUCE_MIN = 3, /**< min */
  30. HOROVOD_REDUCE_MAX = 4, /**< max */
  31. HOROVOD_REDUCE_PROD = 5, /**< proo */
  32. HOROVOD_REDUCE_RESERVED /**< reserved */
  33. } HorovodReduceOp;
  34. const u32 HCCL_MAX_SEGMENT_NUM = 8; // The max number of gradient segments.
  35. /**
  36. * @brief the feature of the model
  37. */
  38. struct model_feature {
  39. const char *model_name; /**< The model name */
  40. u32 gradient_num; /**< The number of gradients */
  41. float *gradient_size; /**< The size of each gradient */
  42. float *gradient_time; /**< The BP compution time of each gradient */
  43. };
  44. /**
  45. * @brief Memory Register Address Struct for Remote Access
  46. */
  47. struct MemRegisterAddr {
  48. u64 addr;
  49. u64 length;
  50. };
  51. /*
  52. * @brief The max number of memory register address for remote access.
  53. */
  54. const u32 HCCL_MAX_MEM_REGISTER_NUM = 32;
  55. enum GradSplitForceMode {
  56. FORCE_NONE, /**< no force */
  57. FORCE_SIZE, /**< force split gradient by size */
  58. FORCE_RESERVED /**< reserved */
  59. };
  60. enum OriginalGraphShapeType {
  61. KNOWN_SHAPE,
  62. UNKNOWN_SHAPE,
  63. SHAPE_RESERVED /**< reserved */
  64. };
  65. enum HcclEventType {
  66. HCCL_EVENT_SEND_COMPLETION = 0,
  67. HCCL_EVENT_RECV_REQUEST,
  68. HCCL_EVENT_RECV_COMPLETION,
  69. HCCL_EVENT_CONGESTION_RELIEF,
  70. HCCL_EVENT_RESERVED /**< reserved */
  71. };
  72. const u32 TAG_MAX_LEN = 127; // tag
  73. using TagAttr = struct TagAttrDef {
  74. char name[TAG_MAX_LEN + 1]; // tagʶ
  75. // tagʶĽݣǷýսӿڣ0 = , 1 = (Ԥݲ֧)
  76. // activeRecv = 0ղյݻ߷ʱ֪ͨߡ
  77. uint32_t activeRecv;
  78. uint32_t sendCredit; // øtaginflightsend
  79. uint32_t eventId;
  80. };
  81. using HcclEventMsg = struct HcclEventMsgDef {
  82. HcclComm comm;
  83. u32 peerRank;
  84. u32 tag;
  85. // 0:HCCL_SEND_COMPLETION; 1:HCCL_RECV_COMPLETION; 2:HCCL_RECV_REQUEST; 3:HCCL_CONGESTION_RELIEF
  86. u32 hcclEventType;
  87. union {
  88. struct {
  89. u32 reserver;
  90. } sendCompletionItem;
  91. struct {
  92. u32 reserver;
  93. } recvRequestItem;
  94. struct {
  95. u32 reserver;
  96. } recvCompletionItem;
  97. struct CongestionReliefItem {
  98. u32 reserver;
  99. } congestionReliefItem;
  100. } desc;
  101. };
  102. /**
  103. * @brief stream handle.
  104. */
  105. typedef void *rtStream_t;
  106. /**
  107. * @brief model handle.
  108. */
  109. typedef void *rtModel_t;
  110. struct HcomOperation {
  111. std::string hcclType;
  112. void *inputPtr{nullptr};
  113. void *outputPtr{nullptr};
  114. u64 count{0};
  115. HcclDataType dataType{HCCL_DATA_TYPE_RESERVED};
  116. HcclReduceOp opType{HCCL_REDUCE_RESERVED};
  117. u32 root{0};
  118. };
  119. struct HcomRemoteAccessAddrInfo {
  120. u32 remotetRankID;
  121. u64 remoteAddr; // host embedding table address
  122. u64 localAddr; // device HBM address
  123. u64 length; // Memory Length in Bytes
  124. };
  125. struct HcomAllToAllVParams {
  126. void *sendbuf{nullptr}; // device mem
  127. void *sendcounts{nullptr}; // device mem; Type: uint_64
  128. void *sdispls{nullptr}; // device mem; Type: uint_64
  129. HcclDataType sendtype{HCCL_DATA_TYPE_RESERVED};
  130. void *recvbuf{nullptr}; // device mem
  131. void *recvcounts{nullptr}; // device mem; Type: uint_64
  132. void *rdispls{nullptr}; // device mem; Type: uint_64
  133. HcclDataType recvtype{HCCL_DATA_TYPE_RESERVED};
  134. const char *group{nullptr}; // not used now
  135. };
  136. struct HcomAllToAllVCParams {
  137. void *sendbuf{nullptr}; // device mem
  138. HcclDataType sendtype{HCCL_DATA_TYPE_RESERVED};
  139. void *recvbuf{nullptr}; // device mem
  140. HcclDataType recvtype{HCCL_DATA_TYPE_RESERVED};
  141. void *sendcountmatrix{nullptr}; // device mem; Type: uint_64
  142. const char *group{nullptr}; // not used now
  143. };
  144. struct HcomGatherAllToAllVParams {
  145. void *addrInfo; // device mem; contains host VA[uint_64]: [addr, length, addr, length, addr, length, ...]
  146. void *addrInfoCountPerRank; // device mem; length: ranksize; contains addrInfoCounts for every rank
  147. void *recvbuf; // device mem
  148. void *recvcounts; // device mem; Type: uint_64
  149. void *rdispls; // device mem; Type: uint_64
  150. void *gatheredbuf; // device mem
  151. s32 addrLength;
  152. HcclDataType recvtype;
  153. const char *group; // not used now
  154. };
  155. typedef enum workMode {
  156. HCCL_MODE_NORMAL = 0, // ֧κProbe any֧־ȷprobe
  157. HCCL_MODE_ANY = 1 // ֧ANY_SOURCE + ANY_TAGprobe
  158. } WorkMode;
  159. typedef struct tagCommAttr {
  160. WorkMode mode; // ͨڵprobeģʽ
  161. uint32_t deviceId = 0;
  162. } CommAttr;
  163. typedef void* HcclMessage;
  164. typedef void* HcclRequest;
  165. typedef struct {
  166. int srcRank; // /̽⵽msg/ŷķͶrank_idMPI׼壬߿Է
  167. int tag; // /̽⵽msg/ŷtagMPI׼壬߿Է
  168. int error; // /̽Ĵ0no errorothers̳MPI׼壬߿Է
  169. int cancelled; // ָʵ֣߷
  170. int count; // /̽⵽payloadСָʵ֣߷
  171. } HcclStatus;
  172. #define HCCL_REQUEST_NULL NULL
  173. #define HCCL_TAG_ANY (1 << 30)
  174. #ifdef __cplusplus
  175. }
  176. #endif // __cplusplus
  177. #endif // HCCL_BASE_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示