You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

opr_param_defs.fbs 61 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919
  1. // generated by gen_param_defs.py for 53ca6252b5b9568f67b9767fb4fd0d2ef6b717b28a861692e9105d5f796a9472
  2. include "dtype.fbs";
  3. namespace mgb.serialization.fbs.param;
  4. enum ArgsortOrder : uint {
  5. ASCENDING = 0,
  6. DESCENDING = 1,
  7. }
  8. enum BNFwdMode : uint {
  9. /// Training phase.
  10. TRAINING = 0,
  11. /// Inference phase.
  12. INFERENCE = 1,
  13. }
  14. enum BNParamDim : uint {
  15. /// Dim of params (Sigma, Mu) is 1 x 1 x H x W
  16. DIM_11HW = 0,
  17. /// Dim of params (Sigma, Mu) is 1 x C x H x W
  18. DIM_1CHW = 1,
  19. /// Dim of params (Sigma, Mu) is 1 x C x 1 x 1
  20. DIM_1C11 = 2,
  21. /// Dim of params (Sigma, Mu) is 1 x 1 x 1 x C
  22. DIM_111C = 3,
  23. }
  24. enum CondTakeMode : uint {
  25. /// take if ``abs(data-val)<eps``
  26. EQ = 0,
  27. /// take if ``abs(data-val)>=eps``
  28. NEQ = 1,
  29. /// take if ``data<val``
  30. LT = 2,
  31. /// take if ``data<=val``
  32. LEQ = 3,
  33. /// take if ``data>val``
  34. GT = 4,
  35. /// take if ``data>=val``
  36. GEQ = 5,
  37. }
  38. enum Conv3DBiasNonlineMode : uint {
  39. IDENTITY = 0,
  40. RELU = 1,
  41. SIGMOID = 2,
  42. }
  43. enum ConvBiasV0NonlineMode : uint {
  44. IDENTITY = 0,
  45. RELU = 1,
  46. SIGMOID = 2,
  47. H_SWISH = 3,
  48. }
  49. enum ConvPoolingMethod : uint {
  50. WITH_TEXTURE_OBJ = 0,
  51. WITH_SHARED_MEM = 1,
  52. }
  53. enum ConvPoolingNonlineMode : uint {
  54. IDENTITY = 0,
  55. RELU = 1,
  56. SIGMOID = 2,
  57. }
  58. enum ConvPoolingPoolMode : uint {
  59. AVERAGE = 0,
  60. MAX_ = 1,
  61. }
  62. /// convolution data/filter/output format; see :class:`RelayoutFormat` for more
  63. /// details
  64. enum ConvolutionFormat : uint {
  65. NCHW = 0,
  66. NHWC = 1,
  67. NHWCD4 = 2,
  68. NCHW4 = 3,
  69. NCHW8 = 4,
  70. NCHW32 = 5,
  71. NCHW88 = 6,
  72. NCHW44 = 7,
  73. NCHW44_DOT = 8,
  74. /// NCHW4_NCHW32 means input tensors are nchw4 layout, output tensor is
  75. /// nchw32 layout
  76. NCHW4_NCHW32 = 9,
  77. /// NCHW32_NCHW4 means input tensors are nchw32 layout, output tensor is
  78. /// nchw4 layout
  79. NCHW32_NCHW4 = 10,
  80. /// NCHW4_NCHW means input tensors are nchw4 layout, output tensor is nchw
  81. /// layout
  82. NCHW4_NCHW = 11,
  83. /// NHWC_NCHW means input tensors are nhwc layout, output tensor is nchw
  84. /// layout
  85. NHWC_NCHW = 12,
  86. /// NHWC_NCHW4_IC_SMALL means input tensors are nhwc(c < 4) layout, output
  87. /// tensor is nchw4 layout, padding c=4
  88. NHWC_NCHW4_IC_SMALL = 13,
  89. /// NCHW_NCHW4_IC_SMALL means input tensors are nchw(c < 4) layout, output
  90. /// tensor is nchw4 layout, padding c=4
  91. NCHW_NCHW4_IC_SMALL = 14,
  92. /// CHWN4 is currently only used on Nvidia platform for fast implementation
  93. /// of convolution using CUDA/SASS. The channels are splitted to groups of 4
  94. /// channels.
  95. CHWN4 = 15,
  96. /// NCHW64 is designed for convolution implementation to utilizing
  97. /// TensorCore instructions for 4-bit integers on Nvidia platforms
  98. NCHW64 = 16,
  99. /// NCHW4_NHWC means input tensors are nchw4 layout, output tensor is nhwc
  100. /// layout
  101. NCHW4_NHWC = 17,
  102. }
  103. enum Convolution3DDataType : uint {
  104. /// input/output both float32/float16
  105. FLOAT = 0,
  106. /// input/output both float16, the internal compute is float32
  107. FLOAT_IO16xC32 = 1,
  108. }
  109. enum Convolution3DFormat : uint {
  110. NCDHW = 0,
  111. NDHWC = 1,
  112. }
  113. enum Convolution3DMode : uint {
  114. CROSS_CORRELATION = 0,
  115. CONVOLUTION = 1,
  116. }
  117. enum Convolution3DSparse : uint {
  118. /// dense convolution: filter shape should be [oc, ic, spatial...] if format
  119. /// is NCDHW, [oc, spatial..., ic] if format is NDHWC
  120. DENSE = 0,
  121. /// group convolution: filter shape should be [group, oc_per_group,
  122. /// ic_per_group, spatial...] if format is NCDHW, [group, oc_per_group,
  123. /// spatial..., ic_per_group] if format is NDHWC
  124. GROUP = 1,
  125. }
  126. enum ConvolutionV0DataType : uint {
  127. /// input/output both float32/float16
  128. FLOAT = 0,
  129. INT8x8x16 = 1,
  130. INT8x8x32 = 2,
  131. /// input/output both float16, the internal compute is float32
  132. FLOAT_IO16xC32 = 3,
  133. /// input QuantizedAsymm8, output QuantizedS32
  134. QUINT8x8x32 = 4,
  135. /// input int8, output specified by tensor DType
  136. INT8x8xX = 5,
  137. /// input QuantizedAsymm4, output QuantizedS32
  138. QUINT4x4x32 = 6,
  139. }
  140. /// convolution data/filter/output format; see :class:`RelayoutFormat` for more
  141. /// details
  142. enum ConvolutionV0Format : uint {
  143. NCHW = 0,
  144. NHWC = 1,
  145. NHWCD4 = 2,
  146. NCHW4 = 3,
  147. NCHW8 = 4,
  148. NCHW32 = 5,
  149. NCHW88 = 6,
  150. NCHW44 = 7,
  151. NCHW44_DOT = 8,
  152. /// NCHW layout with weights tranformed by winograd
  153. NCHW_WINOGRAD = 9,
  154. /// NCHW88 layout with weights tranformed by winograd
  155. NCHW88_WINOGRAD = 10,
  156. /// NCHW44 layout with weights tranformed by winograd
  157. NCHW44_WINOGRAD = 11,
  158. /// NCHW4_NCHW32 means input tensors are nchw4 layout, output tensor is
  159. /// nchw32 layout
  160. NCHW4_NCHW32 = 12,
  161. /// NCHW32_NCHW4 means input tensors are nchw32 layout, output tensor is
  162. /// nchw4 layout
  163. NCHW32_NCHW4 = 13,
  164. /// NCHW4_NCHW means input tensors are nchw4 layout, output tensor is nchw
  165. /// layout
  166. NCHW4_NCHW = 14,
  167. /// NHWC_NCHW means input tensors are nhwc layout, output tensor is nchw
  168. /// layout
  169. NHWC_NCHW = 15,
  170. /// NHWC_NCHW4_IC_SMALL means input tensors are nhwc(c < 4) layout, output
  171. /// tensor is nchw4 layout, padding c=4
  172. NHWC_NCHW4_IC_SMALL = 16,
  173. /// NCHW_NCHW4_IC_SMALL means input tensors are nchw(c < 4) layout, output
  174. /// tensor is nchw4 layout, padding c=4
  175. NCHW_NCHW4_IC_SMALL = 17,
  176. /// CHWN4 is currently only used on Nvidia platform for fast implementation
  177. /// of convolution using CUDA/SASS. The channels are splitted to groups of 4
  178. /// channels.
  179. CHWN4 = 18,
  180. /// NCHW4_NHWC means input tensors are nchw4 layout, output tensor is nhwc
  181. /// layout
  182. NCHW4_NHWC = 19,
  183. }
  184. enum ConvolutionV0Mode : uint {
  185. CROSS_CORRELATION = 0,
  186. CONVOLUTION = 1,
  187. }
  188. enum ConvolutionV0Sparse : uint {
  189. /// dense convolution: filter shape should be [oc, ic, spatial...] if format
  190. /// is NCHW, [oc, spatial..., ic] if format is NHWC
  191. DENSE = 0,
  192. /// group convolution: filter shape should be [group, oc_per_group,
  193. /// ic_per_group, spatial...] if format is NCHW, [group, oc_per_group,
  194. /// spatial..., ic_per_group] if format is NHWC
  195. GROUP = 1,
  196. }
  197. /// Specifies special computation modes, e.g. different combinations of
  198. /// intermediate result data types.
  199. enum ConvolutionV1ComputeMode : uint {
  200. /// No special requirements on the precision of intermediate results.
  201. DEFAULT = 0,
  202. /// Use Float32 accumulator and intermediate result. Only supported when
  203. /// input and output is Float16.
  204. FLOAT32 = 1,
  205. }
  206. enum CvtColorMode : uint {
  207. RGB2GRAY = 0,
  208. RGB2YUV = 1,
  209. YUV2RGB = 2,
  210. GRAY2RGB = 3,
  211. RGBA2RGB = 4,
  212. RGBA2BGR = 5,
  213. RGBA2GRAY = 6,
  214. RGB2BGR = 7,
  215. BGR2GRAY = 8,
  216. BGR2RGB = 9,
  217. /// For historical reasons, referred to as YCC by opencv
  218. YUV2GRAY_NV21 = 10,
  219. YUV2RGB_NV21 = 11,
  220. YUV2BGR_NV21 = 12,
  221. YUV2GRAY_NV12 = 13,
  222. YUV2RGB_NV12 = 14,
  223. YUV2BGR_NV12 = 15,
  224. YUV2GRAY_YV12 = 16,
  225. YUV2RGB_YV12 = 17,
  226. YUV2BGR_YV12 = 18,
  227. YUV2GRAY_YU12 = 19,
  228. YUV2RGB_YU12 = 20,
  229. YUV2BGR_YU12 = 21,
  230. YCrCb2RGB = 22,
  231. YCrCb2BGR = 23,
  232. /// BT601 yuv format, referred to as YUV by opencv
  233. BT601_YUV2RGB_NV21 = 24,
  234. BT601_YUV2BGR_NV21 = 25,
  235. BT601_YUV2RGB_NV12 = 26,
  236. BT601_YUV2BGR_NV12 = 27,
  237. BT601_YUV2RGB_YV12 = 28,
  238. BT601_YUV2BGR_YV12 = 29,
  239. BT601_YUV2RGB_YU12 = 30,
  240. BT601_YUV2BGR_YU12 = 31,
  241. }
  242. enum DctChannelSelectV0FastImpl : uint {
  243. NONE = 0,
  244. FIX_32_MASK = 1,
  245. }
  246. enum ElemwiseMode : uint {
  247. /// unary: max(x, 0)
  248. RELU = 0,
  249. /// unary: abs(x)
  250. ABS = 1,
  251. /// unary: acos(x)
  252. ACOS = 2,
  253. /// unary: asin(x)
  254. ASIN = 3,
  255. /// unary: ceil(x)
  256. CEIL = 4,
  257. /// unary: cos(x)
  258. COS = 5,
  259. /// unary: exp(x)
  260. EXP = 6,
  261. /// unary: numerically stable exp(x)-1
  262. EXPM1 = 7,
  263. /// unary: floor(x)
  264. FLOOR = 8,
  265. /// unary: natural logarithm, log(x)
  266. LOG = 9,
  267. /// unary: numerically stable log(x+1)
  268. LOG1P = 10,
  269. /// unary: -x
  270. NEGATE = 11,
  271. /// unary: 1/(1+exp(-x))
  272. SIGMOID = 12,
  273. /// unary: sin(x)
  274. SIN = 13,
  275. /// unary: tanh(x)
  276. TANH = 14,
  277. /// binary: x > 0 ? y : -y
  278. ABS_GRAD = 15,
  279. /// binary: x + y
  280. ADD = 16,
  281. /// binary: floor(x / y)
  282. FLOOR_DIV = 17,
  283. /// binary: max(x, y)
  284. MAX_ = 18,
  285. /// binary: min(x, y)
  286. MIN_ = 19,
  287. /// binary: x % y or fmodf(x, y)
  288. MOD = 20,
  289. /// binary: x * y
  290. MUL = 21,
  291. /// binary: pow(x, y)
  292. POW = 22,
  293. /// binary: x * (1 - x) * y
  294. SIGMOID_GRAD = 23,
  295. /// binary: x - y
  296. SUB = 24,
  297. /// binary: (x > 0) * y
  298. SWITCH_GT0 = 25,
  299. /// binary: (1 - x * x) * y
  300. TANH_GRAD = 26,
  301. /// binary: x / y
  302. TRUE_DIV = 27,
  303. /// binary: numerically stable log(exp(x) + exp(y))
  304. LOG_SUM_EXP = 28,
  305. /// binary: x < y
  306. LT = 29,
  307. /// binary: x <= y
  308. LEQ = 30,
  309. /// binary: x == y
  310. EQ = 31,
  311. /// bitwise binary: x << y. Note that result is undefined if y < 0 or y >=
  312. /// bitwidth. Logical shift is performed for unsigned intergers, and
  313. /// arithmetic shift for signed ones.
  314. SHL = 32,
  315. /// bitwise binary: x >> y; see SHL mode for more details
  316. SHR = 33,
  317. /// ternary: x <= y ? z : 0
  318. COND_LEQ_MOV = 34,
  319. /// compute ``a * b + c`` where c must either have same layout as a or b, or
  320. /// be a scalar
  321. FUSE_MUL_ADD3 = 35,
  322. /// compute ``a * A + b * B`` where a and b must have equal layout, and A
  323. /// and B must have equal layout. In the inputs ``b`` and ``B`` can be
  324. /// swapped
  325. FUSE_MUL_ADD4 = 36,
  326. /// binary: max(x+y, 0)
  327. FUSE_ADD_RELU = 37,
  328. /// binary: 1/(1+exp(-(x+y)))
  329. FUSE_ADD_SIGMOID = 38,
  330. /// binary: tanh(x+y)
  331. FUSE_ADD_TANH = 39,
  332. /// unary: rational approximation of tanh(x)
  333. FAST_TANH = 40,
  334. /// binary: grad of the rational approximation of tanh(x)
  335. FAST_TANH_GRAD = 41,
  336. /// unary: round(x), the nearest integer value to x, rounding halfway cases
  337. /// away from zero. Float only.
  338. ROUND = 42,
  339. /// binary: rounded higher l bits of x * y, where l is the bit length of x.
  340. RMULH = 43,
  341. /// binary: atan2(y,x)
  342. ATAN2 = 44,
  343. /// unary: erf(x)
  344. ERF = 45,
  345. /// unary: inverse function of erf(x)
  346. ERFINV = 46,
  347. /// unary: erfc(x)
  348. ERFC = 47,
  349. /// unary: inverse function of erfc(x)
  350. ERFCINV = 48,
  351. /// unary: x * clip(x + 3, 0, 6) / 6
  352. H_SWISH = 49,
  353. /// binary: x < -3 ? 0 : (x > 3 ? y : (2 * x + 3) / 6 * y)
  354. H_SWISH_GRAD = 50,
  355. /// binary: hswish(x+y)
  356. FUSE_ADD_H_SWISH = 51,
  357. /// unary: !x
  358. NOT = 52,
  359. /// binary: x && y
  360. AND = 53,
  361. /// binary: x || y
  362. OR = 54,
  363. /// binary: x ^ y
  364. XOR = 55,
  365. /// unary: x / (1 + exp(-x))
  366. SILU = 56,
  367. /// binary: grad(x / (1 + exp(-x))
  368. SILU_GRAD = 57,
  369. /// unary: x Phi(x)
  370. GELU = 58,
  371. /// binary: grad(x Phi(x))
  372. GELU_GRAD = 59,
  373. }
  374. enum ElemwiseMultiTypeMode : uint {
  375. /// compute ``a * b + c`` requiring that ``a`` be int16 and ``b`` and ``c``
  376. /// int32, and the result is int32. This mode is optimized for the channel-
  377. /// broadacsted case, i.e. ``a`` has shape (A, B, C) and ``b`` and ``c``
  378. /// have shape (1, C, 1)
  379. FUSE_MUL_ADD3_INT16x32x32x32 = 0,
  380. /// compuate ``a * b + c`` where the inputs ``a`` is an integer type ``b``
  381. /// and ``c`` are both ``float32``, the result is ``int8``. This is
  382. /// currently only optimized for ``(1, x)`` broadcast for ``b`` and ``c``.
  383. /// Computation is carried in floating points and results are rounded
  384. /// towards zero with saturated cast to int.
  385. FUSE_MUL_ADD3_IXxF32xF32xI8 = 1,
  386. /// Compute ``a >> b``, round the result according to lower ``b`` bits of
  387. /// ``a``` and make a saturating conversion to int8. Where ``a`` should be
  388. /// an integer tensor and ``b`` should be an int8 scalar.
  389. ROUND_SHR_SATURATE_IXxI8xI8 = 2,
  390. /// Fused operation of an int16 elemwise add, an int16 rounding multiply
  391. /// high and an int16 to int8 rounding right shift with saturation.
  392. FUSE_ADD_RMULH_ROUND_SHR_SATURATE_INT16x16x16x8 = 3,
  393. /// Fused operation of an int32 elemwise add, an int32 rounding multiply
  394. /// high and an int32 to int8 rounding right shift with saturation.
  395. FUSE_ADD_RMULH_ROUND_SHR_SATURATE_INT32x32x32x8 = 4,
  396. /// Compute ``a >> b``, round the result according to lower ``b`` bits of
  397. /// ``a``` and make a saturating conversion to int16. Where ``a`` should be
  398. /// an integer tensor and ``b`` should be an int8 scalar.
  399. ROUND_SHR_SATURATE_IXxI8xI16 = 5,
  400. /// Fused elemwise add two quantized int8 with specifiedoutput quantized
  401. /// dtype
  402. QADD = 6,
  403. /// Fused elemwise add two quantized int8 followed by ReLU and typecvt to
  404. /// specified dtype
  405. QFUSE_ADD_RELU = 7,
  406. /// Fused elemwise multiply two quantized int8 with specifiedoutput
  407. /// quantized dtype
  408. QMUL = 8,
  409. /// Fused elemwise min two quantized int8 with specifiedoutput quantized
  410. /// dtype
  411. QMIN = 9,
  412. /// quantized: max(x, y), with specified output quantized dtype
  413. QMAX = 10,
  414. /// quantized: x - y
  415. QSUB = 11,
  416. /// quantized: x / y
  417. QTRUE_DIV = 12,
  418. /// quantized: sigmoid(x + y)
  419. QFUSE_ADD_SIGMOID = 13,
  420. /// quantized: tanh(x + y)
  421. QFUSE_ADD_TANH = 14,
  422. /// quantized: x > 0 ? x : 0
  423. QRELU = 15,
  424. /// quantized: x > 0 ? x : -x
  425. QABS = 16,
  426. /// quantized: sigmoid(x)
  427. QSIGMOID = 17,
  428. /// quantized: exp(x)
  429. QEXP = 18,
  430. /// quantized: tanh(x)
  431. QTANH = 19,
  432. /// quantized: x * y + z
  433. QFUSE_MUL_ADD3 = 20,
  434. /// quantized: fast_tanh(x)
  435. QFAST_TANH = 21,
  436. /// quantized: -x
  437. QNEGATE = 22,
  438. /// quantized: acos(x)
  439. QACOS = 23,
  440. /// quantized: asin(x)
  441. QASIN = 24,
  442. /// quantized: ceil(x)
  443. QCEIL = 25,
  444. /// quantized: cos(x)
  445. QCOS = 26,
  446. /// quantized: expm1(x)
  447. QEXPM1 = 27,
  448. /// quantized: floor(x)
  449. QFLOOR = 28,
  450. /// quantized: log(x)
  451. QLOG = 29,
  452. /// quantized: log1p(x)
  453. QLOG1P = 30,
  454. /// quantized: sin(x)
  455. QSIN = 31,
  456. /// quantized: round(x)
  457. QROUND = 32,
  458. /// quantized: erf(x)
  459. QERF = 33,
  460. /// quantized: erfinv(x)
  461. QERFINV = 34,
  462. /// quantized: erfc(x)
  463. QERFC = 35,
  464. /// quantized: erfcinv(x)
  465. QERFCINV = 36,
  466. /// quantized: abs_grad
  467. QABS_GRAD = 37,
  468. /// quantized floor_div
  469. QFLOOR_DIV = 38,
  470. /// quantized mod
  471. QMOD = 39,
  472. /// quantized sigmoid_grad
  473. QSIGMOID_GRAD = 40,
  474. /// quantized switch_gt0
  475. QSWITCH_GT0 = 41,
  476. /// quantized tanh_grad
  477. QTANH_GRAD = 42,
  478. /// quantized lt
  479. QLT = 43,
  480. /// quantized leq
  481. QLEQ = 44,
  482. /// quantized eq
  483. QEQ = 45,
  484. /// quantized pow
  485. QPOW = 46,
  486. /// quantized log_sum_exp
  487. QLOG_SUM_EXP = 47,
  488. /// quantized fast_tanh_grad
  489. QFAST_TANH_GRAD = 48,
  490. /// quantized atan2
  491. QATAN2 = 49,
  492. /// quantized cond_leq_mov
  493. QCOND_LEQ_MOV = 50,
  494. /// quantized h_swish
  495. QH_SWISH = 51,
  496. /// quantized h_swish(x+y)
  497. QFUSE_ADD_H_SWISH = 52,
  498. /// quantized h_swish_grad
  499. QH_SWISH_GRAD = 53,
  500. /// compute ``a * b + c`` requiring that ``a`` be int16 and ``b`` and ``c``
  501. /// float32, and the result is float32.
  502. FUSE_MUL_ADD3_INT16xF32xF32xF32 = 54,
  503. /// compute ``a * b `` requiring that ``a`` be int16 and ``b`` float32, and
  504. /// the result is float32.
  505. MUL_INT16xF32xF32 = 55,
  506. /// compute ``a * b + c`` requiring that ``a`` be uint8 and ``b`` and ``c``
  507. /// float32, and the result is float32.
  508. FUSE_MUL_ADD3_UINT8xF32xF32xF32 = 56,
  509. }
  510. enum MatrixMulFormat : uint {
  511. /// Normal matrix mul: (M, K) x (K, N) = (M, N)
  512. DEFAULT = 0,
  513. /// Split 4 from M and K, better for neon compute:(M/4, K/4, 4(k), 4(m)) x
  514. /// (K/4, N, 4(k)). if transposeA the layout is (K/4, M/4, 4(k), 4(m)) x
  515. /// (K/4, N, 4(k))
  516. MK4 = 1,
  517. /// Split 8 from M and K, better for neon compute:(M/8, K/8, 8(k), 8(m)) x
  518. /// (K/8, N, 8(k)). if transposeA the layout is (K/8, M/8, 8(k), 8(m)) x
  519. /// (K/8, N, 8(k))
  520. MK8 = 2,
  521. /// Split 4 from M and K, better for neon dotprod:M/4, K/4, 4(m), 4(k)) x
  522. /// (K/4, N, 4(k)). if transposeA the layout is (K/4, M/4, 4(m), 4(k)) x
  523. /// (K/4, N, 4(k))
  524. MK4_DOT = 3,
  525. }
  526. enum MatrixMulV0DataType : uint {
  527. /// input/output both float32/float16
  528. FLOAT = 0,
  529. INT8x8x16 = 1,
  530. INT8x8x32 = 2,
  531. /// input/output both float16, the internal compute is float32
  532. FLOAT_IO16xC32 = 3,
  533. /// input QuantizedAsymm8, output QuantizedS32
  534. QUINT8x8x32 = 4,
  535. /// input QuantizedAsymm4, output QuantizedS32
  536. QUINT4x4x32 = 5,
  537. }
  538. /// Specifies special computation modes, e.g. different combinations of
  539. /// intermediate result data types.
  540. enum MatrixMulV1ComputeMode : uint {
  541. /// No special requirements on the precision of intermediate results.
  542. DEFAULT = 0,
  543. /// Use Float32 accumulator and intermediate result. Only supported when
  544. /// input and output is Float16.
  545. FLOAT32 = 1,
  546. }
  547. enum PaddingPaddingMode : uint {
  548. /// aaaaaa|abcdefgh|hhhhhhh
  549. REPLICATE = 0,
  550. /// fedcba|abcdefgh|hgfedcb
  551. REFLECT = 1,
  552. /// iiiiii|abcdefgh|iiiiiii
  553. CONSTANT = 2,
  554. }
  555. enum PoolingV0Mode : uint {
  556. /// maximum value inside pooling window
  557. MAX_ = 0,
  558. /// arithmetic mean of all values inside pooling window. Padding values are
  559. /// taken into account and are viewed as zero
  560. AVERAGE = 1,
  561. /// arithmetic mean of all values inside pooling window. No padding isused.
  562. AVERAGE_COUNT_EXCLUDE_PADDING = 2,
  563. }
  564. enum RNNCellNonlineMode : uint {
  565. IDENTITY = 0,
  566. RELU = 1,
  567. TANH = 2,
  568. }
  569. enum ROIAlignV0Mode : uint {
  570. MAX_ = 0,
  571. AVERAGE = 1,
  572. }
  573. enum ROIPoolingMode : uint {
  574. /// maximum value inside pooling window; pooling result would be 0 if
  575. /// pooling window is empty
  576. MAX_ = 0,
  577. /// arithmetic mean of all values inside pooling window; pooling result
  578. /// would be 0 if pooling window is empty
  579. AVERAGE = 1,
  580. }
  581. enum ReduceDataType : uint {
  582. /// input/output are the same data type, and the internal computation type would be chosen by the input/output dtypes and the reduction mode.
  583. /// Currently, ```DEFAULT``` mode means:
  584. ///
  585. /// +--------------------+-----------------------------------+-------------------+
  586. /// | Input/Output DType | Mode | Computation DType |
  587. /// +====================+===================================+===================+
  588. /// | FLOAT32 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | FLOAT32 |
  589. /// +--------------------+-----------------------------------+-------------------+
  590. /// | FLOAT16 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | FLOAT16 |
  591. /// +--------------------+-----------------------------------+-------------------+
  592. /// | INT32 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | INT32 |
  593. /// +--------------------+-----------------------------------+-------------------+
  594. /// | INT8 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | INT8 |
  595. /// +--------------------+-----------------------------------+-------------------+
  596. /// | QuantizedS8 | MIN/MAX | QuantizedS8 |
  597. /// +--------------------+-----------------------------------+-------------------+
  598. /// | QuantizedS8 | MEAN/SUM | QuantizedS32 |
  599. /// +--------------------+-----------------------------------+-------------------+
  600. /// | Quantized8Asymm | MIN/MAX | Quantized8Asymm |
  601. /// +--------------------+-----------------------------------+-------------------+
  602. /// | Quantized8Asymm | MEAN/SUM | QuantizedS32 |
  603. /// +--------------------+-----------------------------------+-------------------+
  604. ///
  605. ///
  606. DEFAULT = 0,
  607. /// Deprecated. This was replaced by FLOAT_O16xC32, and input's dtype
  608. /// decided by actual input tensor.
  609. FLOAT_IO16xC32 = 1,
  610. /// compute/output both are float32
  611. FLOAT_O32xC32 = 2,
  612. /// compute are float32, output float16
  613. FLOAT_O16xC32 = 3,
  614. /// input quint8, compute and output are qint32
  615. QUINT_I8xO32 = 4,
  616. /// input qint8, compute and output are qint32
  617. QINT_I8xO32 = 5,
  618. }
  619. enum ReduceMode : uint {
  620. SUM = 0,
  621. /// sum of x * x for each element x
  622. SUM_SQR = 1,
  623. PRODUCT = 2,
  624. MIN_ = 3,
  625. MAX_ = 4,
  626. MEAN = 5,
  627. }
  628. enum ReduceV0Mode : uint {
  629. SUM = 0,
  630. /// sum of x * x for each element x
  631. SUM_SQR = 1,
  632. PRODUCT = 2,
  633. MIN_ = 3,
  634. MAX_ = 4,
  635. }
  636. enum ReduceV1DataType : uint {
  637. /// input/output are the same data type, and the internal computation type would be chosen by the input/output dtypes and the reduction mode.
  638. /// Currently, ```DEFAULT``` mode means:
  639. ///
  640. /// +--------------------+-----------------------------------+-------------------+
  641. /// | Input/Output DType | Mode | Computation DType |
  642. /// +====================+===================================+===================+
  643. /// | FLOAT32 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | FLOAT32 |
  644. /// +--------------------+-----------------------------------+-------------------+
  645. /// | FLOAT16 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | FLOAT16 |
  646. /// +--------------------+-----------------------------------+-------------------+
  647. /// | INT32 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | INT32 |
  648. /// +--------------------+-----------------------------------+-------------------+
  649. /// | INT8 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | INT8 |
  650. /// +--------------------+-----------------------------------+-------------------+
  651. /// | QuantizedS8 | MIN/MAX | QuantizedS8 |
  652. /// +--------------------+-----------------------------------+-------------------+
  653. /// | QuantizedS8 | MEAN/SUM | QuantizedS32 |
  654. /// +--------------------+-----------------------------------+-------------------+
  655. /// | Quantized8Asymm | MIN/MAX | Quantized8Asymm |
  656. /// +--------------------+-----------------------------------+-------------------+
  657. /// | Quantized8Asymm | MEAN/SUM | QuantizedS32 |
  658. /// +--------------------+-----------------------------------+-------------------+
  659. ///
  660. ///
  661. DEFAULT = 0,
  662. /// Deprecated. This was replaced by FLOAT_O16xC32, and input's dtype
  663. /// decided by actual input tensor.
  664. FLOAT_IO16xC32 = 1,
  665. /// compute/output both are float32
  666. FLOAT_O32xC32 = 2,
  667. /// compute are float32, output float16
  668. FLOAT_O16xC32 = 3,
  669. /// input quint8, compute and output are qint32
  670. QUINT_I8xO32 = 4,
  671. /// input qint8, compute and output are qint32
  672. QINT_I8xO32 = 5,
  673. }
  674. enum ReduceV1Mode : uint {
  675. SUM = 0,
  676. /// sum of x * x for each element x
  677. SUM_SQR = 1,
  678. PRODUCT = 2,
  679. MIN_ = 3,
  680. MAX_ = 4,
  681. MEAN = 5,
  682. }
  683. /// Relayout mode.
  684. ///
  685. /// **Naming conventions**
  686. ///
  687. /// 1. ``A_B`` means change from layout format ``A`` to ``B``.
  688. /// 2. ``INTER_WEIGHT_xx`` means relayout the weight for faster processing by
  689. /// :attr:`Convolution.Format.NHWCD4` convolutions.
  690. /// 3. A suffix of ``I`` means ``Image2DPack4TensorFormat`` tensor format is used
  691. /// for faster processing on GPUs.
  692. ///
  693. /// **Layout definitions**
  694. ///
  695. /// * ``NCHW`` layout: ``{N, C, H, W}``
  696. /// * ``NHWC`` layout: ``{N, H, W, C}``
  697. /// * ``NHWCD4`` layout: ``{N, H, (C + 3) / 4, W, 4}``
  698. /// * ``NHWCD4I`` layout: with ``align_axis = 2``
  699. /// * ``NCHW4`` layout: ``{N, C/4, H, W, 4}``
  700. /// * ``NCHW88`` layout: ``{N, C/8, H, W, 8}``
  701. /// * ``CHWN4`` layout: ``{C/4, H, W, N, 4}``
  702. /// * ``NCHW64`` layout: ``{N, C/64, H, W, 64}``
  703. ///
  704. /// **Float weight transformation definitions**
  705. ///
  706. /// +---------------+---------------------------------+--------------------+--------------------------------------+------+
  707. /// | Sparsity Type | Input Layout | Input Req | Output Layout | Axis |
  708. /// +===============+=================================+====================+======================================+======+
  709. /// | DENSE | ``{OC, IC, FH, FW}`` | ``OC % 4 == 0`` | ``{OC/4, FH, FW, IC, 4}`` | 3 |
  710. /// +---------------+---------------------------------+--------------------+--------------------------------------+------+
  711. /// | GROUP | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 4 == 0`` | ``{GROUP, OCPG/4, FH, FW, ICPG, 4}`` | 4 |
  712. /// | | | ``ICPG % 4 == 0`` | | |
  713. /// +---------------+---------------------------------+--------------------+--------------------------------------+------+
  714. /// | CHAN | ``{GROUP, 1, 1, FH, FW}`` | ``GROUP % 4 == 0`` | ``{GROUP / 4, 1, FH ,FW, 4}`` | 1 |
  715. /// +---------------+---------------------------------+--------------------+--------------------------------------+------+
  716. ///
  717. /// **Float weight transformation nchw88 definitions**
  718. ///
  719. /// +---------------+---------------------------------+--------------------+--------------------------------------+
  720. /// | Sparsity Type | Input Layout | Input Req | Output Layout |
  721. /// +===============+=================================+====================+======================================+
  722. /// | DENSE | ``{OC, IC, FH, FW}`` | ``OC % 8 == 0`` |``{OC/8, IC/8 ,FH, FW, 8(IC), 8(OC)}``|
  723. /// | | | ``IC % 8 == 0`` | |
  724. /// +---------------+---------------------------------+--------------------+--------------------------------------+
  725. /// | GROUP | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 8 == 0`` | ``{GROUP, OCPG/8, ICPG/8 FH, FW, |
  726. /// | | | ``ICPG % 8 == 0`` | 8(ICPG), 8(OCPG)} `` |
  727. /// +---------------+---------------------------------+--------------------+--------------------------------------+
  728. /// | CHAN | ``{GROUP, 1, 1, FH, FW}`` | ``GROUP % 8 == 0`` | ``{GROUP / 8, 1, FH ,FW, 8}`` |
  729. /// +---------------+---------------------------------+--------------------+--------------------------------------+
  730. ///
  731. /// **Int8(DOT) weight transformation definitions**
  732. ///
  733. /// +---------------+---------------------------------+--------------------+------------------------------------------+------+
  734. /// | Sparsity Type | Input Layout | Input Req | Output Layout | Axis |
  735. /// +===============+=================================+====================+==========================================+======+
  736. /// | DENSE | ``{OC, IC, FH, FW}`` | ``OC % 4 == 0`` | ``{OC/4, FH, FW, IC/4, 4, 4}` | 3 |
  737. /// +---------------+---------------------------------+--------------------+------------------------------------------+------+
  738. /// | GROUP | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 4 == 0`` | ``{GROUP, OCPG/4, FH, FW, ICPG/4, 4, 4}``| 4 |
  739. /// | | | ``ICPG % 4 == 0`` | | |
  740. /// +---------------+---------------------------------+--------------------+------------------------------------------+------+
  741. ///
  742. /// Note: the axis column means the corresponding ``align_axis`` for image format
  743. /// when the ``I`` suffix is present.
  744. ///
  745. /// Note: NCHW_NCHW4_WEIGHT will auto pad oc and ic, you should remove oc in later opr by seting group and oc param with NCHW4_NCHW
  746. ///
  747. enum RelayoutFormatV0Mode : uint {
  748. NHWC_NHWCD4 = 0,
  749. NHWCD4_NHWC = 1,
  750. NHWC_NHWCD4I = 2,
  751. NCHW_NHWCD4 = 3,
  752. NCHW_NHWCD4I = 4,
  753. NHWCD4I_NCHW = 5,
  754. NHWCD4_NCHW = 6,
  755. INTER_WEIGHT_DENSE = 7,
  756. INTER_WEIGHT_DENSEI = 8,
  757. INTER_WEIGHT_GROUP = 9,
  758. INTER_WEIGHT_GROUPI = 10,
  759. INTER_WEIGHT_CHAN = 11,
  760. INTER_WEIGHT_CHANI = 12,
  761. INTER_WEIGHT_DENSEI_DOT = 13,
  762. INTER_WEIGHT_GROUPI_DOT = 14,
  763. NCHW4_CHWN4 = 15,
  764. CHWN4_NCHW4 = 16,
  765. NCHW_NCHW88_CONV_DENSE_WEIGHT = 17,
  766. NCHW_NCHW88_CONV_CHAN_WEIGHT = 18,
  767. NCHW_NCHW88_CONV_GROUP_WEIGHT = 19,
  768. NCHW_NCHW88 = 20,
  769. NCHW88_NCHW = 21,
  770. NCHW_NCHW4_IC_SMALL = 22,
  771. NCHW_NCHW4_IC_SMALL_CONV_DENSE_WEIGHT = 23,
  772. NCHW_NCHW4 = 24,
  773. NCHW4_NCHW = 25,
  774. NCHW_NCHW4_WEIGHT = 26,
  775. NCHW_NCHW64 = 27,
  776. NCHW64_NCHW = 28,
  777. NCHW_NHWC = 29,
  778. NHWC_NCHW = 30,
  779. NHWCD4I_NHWC = 31,
  780. }
  781. enum SeparableConvBorderMode : uint {
  782. BORDER_REPLICATE = 0,
  783. BORDER_REFLECT = 1,
  784. BORDER_REFLECT_101 = 2,
  785. BORDER_WRAP = 3,
  786. BORDER_CONSTANT = 4,
  787. BORDER_TRANSPARENT = 5,
  788. BORDER_ISOLATED = 6,
  789. }
  790. enum SeparableConv3DBorderMode : uint {
  791. BORDER_REPLICATE = 0,
  792. BORDER_REFLECT = 1,
  793. BORDER_REFLECT_101 = 2,
  794. BORDER_WRAP = 3,
  795. BORDER_CONSTANT = 4,
  796. BORDER_TRANSPARENT = 5,
  797. BORDER_ISOLATED = 6,
  798. }
  799. enum SpatialTfGridGeneratorMode : uint {
  800. AFFINE = 0,
  801. }
  802. enum SpatialTfSamplerMode : uint {
  803. BILINEAR = 0,
  804. }
  805. enum TopKMode : uint {
  806. /// only the value of the k'th element would be computed
  807. KTH_ONLY = 0,
  808. /// all the top-k values and corresponding indices would be computed; no
  809. /// order is guaranteed
  810. VALUE_IDX_NOSORT = 1,
  811. /// all the top-k values and corresponding indices sorted
  812. VALUE_IDX_SORTED = 2,
  813. }
  814. enum WarpPerspectiveV1BorderMode : uint {
  815. /// aaaaaa|abcdefgh|hhhhhhh
  816. REPLICATE = 0,
  817. /// fedcba|abcdefgh|hgfedcb
  818. REFLECT = 1,
  819. /// gfedcb|abcdefgh|gfedcba
  820. REFLECT_101 = 2,
  821. /// cdefgh|abcdefgh|abcdefg
  822. WRAP = 3,
  823. /// iiiiii|abcdefgh|iiiiiii
  824. CONSTANT = 4,
  825. TRANSPARENT = 5,
  826. ISOLATED = 6,
  827. }
  828. enum WarpPerspectiveV1InterpolationMode : uint {
  829. NEAREST = 0,
  830. LINEAR = 1,
  831. AREA = 2,
  832. CUBIC = 3,
  833. LANCZOS4 = 4,
  834. }
  835. table Empty {
  836. }
  837. table Axis {
  838. axis:int = 0;
  839. }
  840. table ConvolutionV0 {
  841. mode:ConvolutionV0Mode = CROSS_CORRELATION;
  842. /// padding on one side on the first dimension
  843. pad_h:uint = 0;
  844. /// padding on one side on the second dimension
  845. pad_w:uint = 0;
  846. /// kernel stride on the first dimension
  847. stride_h:uint = 1;
  848. /// kernel stride on the second dimension
  849. stride_w:uint = 1;
  850. /// dilation (i.e. size of each zero-padded kernel block) on the second
  851. /// dimension
  852. dilate_h:uint = 1;
  853. /// dilation (i.e. size of each zero-padded kernel block) on the second
  854. /// dimension
  855. dilate_w:uint = 1;
  856. data_type:ConvolutionV0DataType = FLOAT;
  857. sparse:ConvolutionV0Sparse = DENSE;
  858. /// convolution data/filter/output format; see :class:`RelayoutFormat` for
  859. /// more details
  860. format:ConvolutionV0Format = NCHW;
  861. }
  862. table ConvolutionV1 {
  863. mode:ConvolutionV0Mode = CROSS_CORRELATION;
  864. /// padding on one side on the first dimension
  865. pad_h:uint = 0;
  866. /// padding on one side on the second dimension
  867. pad_w:uint = 0;
  868. /// kernel stride on the first dimension
  869. stride_h:uint = 1;
  870. /// kernel stride on the second dimension
  871. stride_w:uint = 1;
  872. /// dilation (i.e. size of each zero-padded kernel block) on the second
  873. /// dimension
  874. dilate_h:uint = 1;
  875. /// dilation (i.e. size of each zero-padded kernel block) on the second
  876. /// dimension
  877. dilate_w:uint = 1;
  878. sparse:ConvolutionV0Sparse = DENSE;
  879. format:ConvolutionV0Format = NCHW;
  880. /// Specifies special computation modes, e.g. different combinations of
  881. /// intermediate result data types.
  882. compute_mode:ConvolutionV1ComputeMode = DEFAULT;
  883. }
  884. table Convolution {
  885. mode:ConvolutionV0Mode = CROSS_CORRELATION;
  886. /// padding on one side on the first dimension
  887. pad_h:uint = 0;
  888. /// padding on one side on the second dimension
  889. pad_w:uint = 0;
  890. /// kernel stride on the first dimension
  891. stride_h:uint = 1;
  892. /// kernel stride on the second dimension
  893. stride_w:uint = 1;
  894. /// dilation (i.e. size of each zero-padded kernel block) on the second
  895. /// dimension
  896. dilate_h:uint = 1;
  897. /// dilation (i.e. size of each zero-padded kernel block) on the second
  898. /// dimension
  899. dilate_w:uint = 1;
  900. sparse:ConvolutionV0Sparse = DENSE;
  901. /// convolution data/filter/output format; see :class:`RelayoutFormat` for
  902. /// more details
  903. format:ConvolutionFormat = NCHW;
  904. compute_mode:ConvolutionV1ComputeMode = DEFAULT;
  905. }
  906. table MaskPropagate {
  907. /// padding on one side on the first dimension
  908. pad_h:uint = 0;
  909. /// padding on one side on the second dimension
  910. pad_w:uint = 0;
  911. /// kernel stride on the first dimension
  912. stride_h:uint = 1;
  913. /// kernel stride on the second dimension
  914. stride_w:uint = 1;
  915. /// kernel height
  916. kernel_h:uint = 1;
  917. /// kernel width
  918. kernel_w:uint = 1;
  919. /// dilate height
  920. dilate_h:uint = 1;
  921. /// dilate width
  922. dilate_w:uint = 1;
  923. }
  924. table ConvPooling {
  925. method:ConvPoolingMethod = WITH_TEXTURE_OBJ;
  926. convMode:ConvolutionV0Mode = CROSS_CORRELATION;
  927. poolMode:ConvPoolingPoolMode = AVERAGE;
  928. nonlineMode:ConvPoolingNonlineMode = IDENTITY;
  929. pool_shape_h:uint = 1;
  930. pool_shape_w:uint = 1;
  931. pool_stride_h:uint = 1;
  932. pool_stride_w:uint = 1;
  933. pool_pad_h:uint = 0;
  934. pool_pad_w:uint = 0;
  935. conv_stride_h:uint = 1;
  936. conv_stride_w:uint = 1;
  937. conv_pad_h:uint = 0;
  938. conv_pad_w:uint = 0;
  939. }
  940. /// legacy conv_bias
  941. table ConvBiasV0 {
  942. nonlineMode:ConvBiasV0NonlineMode = IDENTITY;
  943. mode:ConvolutionV0Mode = CROSS_CORRELATION;
  944. pad_h:uint = 0;
  945. pad_w:uint = 0;
  946. stride_h:uint = 1;
  947. stride_w:uint = 1;
  948. }
  949. /// active(conv(x, w) + bias)
  950. table ConvBiasV1 {
  951. nonlineMode:ConvBiasV0NonlineMode = IDENTITY;
  952. mode:ConvolutionV0Mode = CROSS_CORRELATION;
  953. data_type:ConvolutionV0DataType = FLOAT;
  954. sparse:ConvolutionV0Sparse = DENSE;
  955. format:ConvolutionV0Format = NCHW;
  956. /// padding on one side on the first dimension
  957. pad_h:uint = 0;
  958. /// padding on one side on the second dimension
  959. pad_w:uint = 0;
  960. /// kernel stride on the first dimension
  961. stride_h:uint = 1;
  962. /// kernel stride on the second dimension
  963. stride_w:uint = 1;
  964. /// dilation (i.e. size of each zero-padded kernel block) on the second
  965. /// dimension
  966. dilate_h:uint = 1;
  967. /// dilation (i.e. size of each zero-padded kernel block) on the second
  968. /// dimension
  969. dilate_w:uint = 1;
  970. }
  971. /// active(conv(x, w) + bias)
  972. table ConvBiasV2 {
  973. nonlineMode:ConvBiasV0NonlineMode = IDENTITY;
  974. mode:ConvolutionV0Mode = CROSS_CORRELATION;
  975. sparse:ConvolutionV0Sparse = DENSE;
  976. format:ConvolutionV0Format = NCHW;
  977. /// padding on one side on the first dimension
  978. pad_h:uint = 0;
  979. /// padding on one side on the second dimension
  980. pad_w:uint = 0;
  981. /// kernel stride on the first dimension
  982. stride_h:uint = 1;
  983. /// kernel stride on the second dimension
  984. stride_w:uint = 1;
  985. /// dilation (i.e. size of each zero-padded kernel block) on the second
  986. /// dimension
  987. dilate_h:uint = 1;
  988. /// dilation (i.e. size of each zero-padded kernel block) on the second
  989. /// dimension
  990. dilate_w:uint = 1;
  991. compute_mode:ConvolutionV1ComputeMode = DEFAULT;
  992. }
  993. /// active(conv(x, w) + bias)
  994. table ConvBiasV3 {
  995. nonlineMode:ConvBiasV0NonlineMode = IDENTITY;
  996. mode:ConvolutionV0Mode = CROSS_CORRELATION;
  997. sparse:ConvolutionV0Sparse = DENSE;
  998. format:ConvolutionV0Format = NCHW;
  999. /// padding on one side on the first dimension
  1000. pad_h:uint = 0;
  1001. /// padding on one side on the second dimension
  1002. pad_w:uint = 0;
  1003. /// kernel stride on the first dimension
  1004. stride_h:uint = 1;
  1005. /// kernel stride on the second dimension
  1006. stride_w:uint = 1;
  1007. /// dilation (i.e. size of each zero-padded kernel block) on the second
  1008. /// dimension
  1009. dilate_h:uint = 1;
  1010. /// dilation (i.e. size of each zero-padded kernel block) on the second
  1011. /// dimension
  1012. dilate_w:uint = 1;
  1013. /// detail meaning \see winograd in conv bias
  1014. output_block_size:uint = 0;
  1015. compute_mode:ConvolutionV1ComputeMode = DEFAULT;
  1016. }
  1017. /// active(conv(x, w) + bias)
  1018. table ConvBias {
  1019. nonlineMode:ConvBiasV0NonlineMode = IDENTITY;
  1020. mode:ConvolutionV0Mode = CROSS_CORRELATION;
  1021. sparse:ConvolutionV0Sparse = DENSE;
  1022. format:ConvolutionFormat = NCHW;
  1023. /// padding on one side on the first dimension
  1024. pad_h:uint = 0;
  1025. /// padding on one side on the second dimension
  1026. pad_w:uint = 0;
  1027. /// kernel stride on the first dimension
  1028. stride_h:uint = 1;
  1029. /// kernel stride on the second dimension
  1030. stride_w:uint = 1;
  1031. /// dilation (i.e. size of each zero-padded kernel block) on the second
  1032. /// dimension
  1033. dilate_h:uint = 1;
  1034. /// dilation (i.e. size of each zero-padded kernel block) on the second
  1035. /// dimension
  1036. dilate_w:uint = 1;
  1037. compute_mode:ConvolutionV1ComputeMode = DEFAULT;
  1038. }
  1039. table SeparableConv {
  1040. mode:ConvolutionV0Mode = CROSS_CORRELATION;
  1041. borderMode:SeparableConvBorderMode = BORDER_REPLICATE;
  1042. is_symm_kernel:bool = true;
  1043. pad_h:uint = 0;
  1044. pad_w:uint = 0;
  1045. stride_h:uint = 1;
  1046. stride_w:uint = 1;
  1047. ksize_h:uint = 3;
  1048. ksize_w:uint = 3;
  1049. anchor_h:uint = 1;
  1050. anchor_w:uint = 1;
  1051. }
  1052. table Images2Neibs {
  1053. pad_h:uint = 0;
  1054. pad_w:uint = 0;
  1055. stride_h:uint = 1;
  1056. stride_w:uint = 1;
  1057. dilate_h:uint = 1;
  1058. dilate_w:uint = 1;
  1059. window_h:uint = 3;
  1060. window_w:uint = 3;
  1061. }
  1062. table SlidingWindowTranspose {
  1063. out_h:uint = 0;
  1064. out_w:uint = 0;
  1065. pad_h:uint = 0;
  1066. pad_w:uint = 0;
  1067. stride_h:uint = 1;
  1068. stride_w:uint = 1;
  1069. dilate_h:uint = 1;
  1070. dilate_w:uint = 1;
  1071. window_h:uint = 3;
  1072. window_w:uint = 3;
  1073. }
  1074. table PoolingV0 {
  1075. mode:PoolingV0Mode = MAX_;
  1076. pad_h:uint = 0;
  1077. pad_w:uint = 0;
  1078. stride_h:uint = 2;
  1079. stride_w:uint = 2;
  1080. window_h:uint = 2;
  1081. window_w:uint = 2;
  1082. format:ConvolutionV0Format = NCHW;
  1083. }
  1084. table Pooling {
  1085. mode:PoolingV0Mode = MAX_;
  1086. pad_h:uint = 0;
  1087. pad_w:uint = 0;
  1088. stride_h:uint = 2;
  1089. stride_w:uint = 2;
  1090. window_h:uint = 2;
  1091. window_w:uint = 2;
  1092. format:ConvolutionFormat = NCHW;
  1093. }
  1094. table Softmax {
  1095. axis:int = -1;
  1096. }
  1097. table AdaptivePoolingV0 {
  1098. mode:PoolingV0Mode = MAX_;
  1099. format:ConvolutionV0Format = NCHW;
  1100. }
  1101. table AdaptivePooling {
  1102. mode:PoolingV0Mode = MAX_;
  1103. format:ConvolutionFormat = NCHW;
  1104. }
  1105. /// see ImageNet Classification with Deep Convolutional Neural Networks for
  1106. /// meaning of the fields
  1107. table LRN {
  1108. /// must be odd
  1109. n:uint = 5;
  1110. k:float = 2.;
  1111. alpha:float = 1e-4;
  1112. beta:float = 0.75;
  1113. }
  1114. table BN {
  1115. param_dim:BNParamDim = DIM_11HW;
  1116. fwd_mode:BNFwdMode = TRAINING;
  1117. epsilon:double = 1e-4;
  1118. avg_factor:double = 1.;
  1119. scale:float = 1.;
  1120. bias:float = 0.;
  1121. }
  1122. table ROIPooling {
  1123. mode:ROIPoolingMode = MAX_;
  1124. scale:float = 1.;
  1125. }
  1126. table WarpPerspectiveV1 {
  1127. imode:WarpPerspectiveV1InterpolationMode = LINEAR;
  1128. bmode:WarpPerspectiveV1BorderMode = REPLICATE;
  1129. format:ConvolutionV0Format = NCHW;
  1130. /// used for CONSTANT bmode
  1131. border_val:float = .0;
  1132. }
  1133. table WarpPerspective {
  1134. imode:WarpPerspectiveV1InterpolationMode = LINEAR;
  1135. bmode:WarpPerspectiveV1BorderMode = REPLICATE;
  1136. format:ConvolutionFormat = NCHW;
  1137. /// used for CONSTANT bmode
  1138. border_val:float = .0;
  1139. }
  1140. table SpatialTfGridGenerator {
  1141. mode:SpatialTfGridGeneratorMode = AFFINE;
  1142. }
  1143. table SpatialTfSampler {
  1144. mode:SpatialTfSamplerMode = BILINEAR;
  1145. }
  1146. table AddUpdate {
  1147. alpha:float = 1.;
  1148. beta:float = 1.;
  1149. bias:float = 0.;
  1150. }
  1151. table Elemwise {
  1152. mode:ElemwiseMode = RELU;
  1153. }
  1154. table ElemwiseMultiType {
  1155. mode:ElemwiseMultiTypeMode = FUSE_MUL_ADD3_INT16x32x32x32;
  1156. }
  1157. /// power with constant exponent
  1158. table PowC {
  1159. exp:float = 0;
  1160. }
  1161. /// 2d discrete cosine transform
  1162. table DctChannelSelectV0 {
  1163. format:ConvolutionV0Format = NCHW;
  1164. fastImpl:DctChannelSelectV0FastImpl = NONE;
  1165. dct_block_size:int = 8;
  1166. }
  1167. /// 2d discrete cosine transform
  1168. table DctChannelSelect {
  1169. format:ConvolutionFormat = NCHW;
  1170. fastImpl:DctChannelSelectV0FastImpl = NONE;
  1171. dct_block_size:int = 8;
  1172. }
  1173. table MatrixMulV0 {
  1174. transposeA:bool = false;
  1175. transposeB:bool = false;
  1176. data_type:MatrixMulV0DataType = FLOAT;
  1177. }
  1178. table MatrixMulV1 {
  1179. transposeA:bool = false;
  1180. transposeB:bool = false;
  1181. /// Specifies special computation modes, e.g. different combinations of
  1182. /// intermediate result data types.
  1183. compute_mode:MatrixMulV1ComputeMode = DEFAULT;
  1184. }
  1185. table MatrixMul {
  1186. transposeA:bool = false;
  1187. transposeB:bool = false;
  1188. compute_mode:MatrixMulV1ComputeMode = DEFAULT;
  1189. format:MatrixMulFormat = DEFAULT;
  1190. }
  1191. table SVD {
  1192. /// Whether to compute the full-sized u and v or only the leading min(m, n)
  1193. /// singular vectors. Ignored if compute_uv is false.
  1194. full_matrices:bool = false;
  1195. /// Whether the left (u) and right (v) singular vectors will be computed and
  1196. /// outputted.
  1197. compute_uv:bool = true;
  1198. }
  1199. /// legacy reduce
  1200. table ReduceV0 {
  1201. mode:ReduceV0Mode = SUM;
  1202. /// axis along which reduction is performed; if -1 is given, reduce to given
  1203. /// target shape (only used in megbrain)
  1204. axis:int = -1;
  1205. }
  1206. /// reduce along given axis
  1207. table ReduceV1 {
  1208. mode:ReduceV1Mode = SUM;
  1209. /// axis along which reduction is performed; if -1 is given, reduce to given
  1210. /// target shape (only used in megbrain)
  1211. axis:int = -1;
  1212. data_type:ReduceV1DataType = DEFAULT;
  1213. }
  1214. /// reduce along given axis
  1215. table Reduce {
  1216. mode:ReduceMode = SUM;
  1217. /// axis along which reduction is performed; if INT_MAX is given, reduce to
  1218. /// given target shape (only used in megbrain)
  1219. axis:int = 2147483647;
  1220. data_type:ReduceDataType = DEFAULT;
  1221. }
  1222. /// calculate accumulated sum along given axis
  1223. table CumsumV0 {
  1224. /// axis along which cumsum is performed
  1225. axis:int = -1;
  1226. /// whether the current element is taken into account
  1227. exclusive:bool = true;
  1228. /// whether the cumsum is forward or backward
  1229. reverse:bool = false;
  1230. }
  1231. /// calculate accumulated sum along given axis
  1232. table Cumsum {
  1233. /// axis along which cumsum is performed, default with INT_MAX
  1234. axis:int = 2147483647;
  1235. /// whether the current element is taken into account
  1236. exclusive:bool = true;
  1237. /// whether the cumsum is forward or backward
  1238. reverse:bool = false;
  1239. }
  1240. table CondTake {
  1241. mode:CondTakeMode = EQ;
  1242. /// the value to be compared with; note that for integer data, val is also
  1243. /// converted to int
  1244. val:float = 0;
  1245. /// used for float equality comparison
  1246. eps:float = 1e-06;
  1247. }
  1248. table Argsort {
  1249. order:ArgsortOrder = ASCENDING;
  1250. }
  1251. table IndexingRemap {
  1252. /// Whether no two dst element maps to the same src element. Enabling this
  1253. /// option can accelerate gradient operator since atomic adding operations
  1254. /// could be avoided.
  1255. is_non_overlapping:bool = false;
  1256. }
  1257. table Sleep {
  1258. /// time to sleep in seconds
  1259. time:float = 0;
  1260. }
  1261. table Linspace {
  1262. /// Whether stop is included in the generated tensor
  1263. endpoint:bool = true;
  1264. }
  1265. table LinspaceFull {
  1266. /// The first val.
  1267. start:double = 0;
  1268. /// The last val.
  1269. stop:double = 1;
  1270. /// Whether stop is included in the generated tensor
  1271. endpoint:bool = true;
  1272. }
  1273. table Eye {
  1274. /// Index of the diagonal: 0 (the default) refers to the main diagonal, a
  1275. /// positive value refers to an upper diagonal, and a negative value to a
  1276. /// lower diagonal.
  1277. k:int = 0;
  1278. /// data type of output value
  1279. dtype:DTypeEnum = Float32;
  1280. }
  1281. table Diag {
  1282. /// Index of the diagonal: 0 (the default) refers to the main diagonal, a
  1283. /// positive value refers to an upper diagonal, and a negative value to a
  1284. /// lower diagonal.
  1285. k:int = 0;
  1286. }
  1287. table UniformRNGV0 {
  1288. seed:ulong = 0;
  1289. }
  1290. table UniformRNG {
  1291. seed:ulong = 0;
  1292. /// The dtype of output Tensor. Only support Float32.
  1293. dtype:DTypeEnum = Float32;
  1294. }
  1295. table GaussianRNGV0 {
  1296. seed:ulong = 0;
  1297. mean:float = 0;
  1298. std:float = 1;
  1299. }
  1300. table GaussianRNG {
  1301. seed:ulong = 0;
  1302. mean:float = 0;
  1303. std:float = 1;
  1304. /// The dtype of output Tensor. Only support Float32.
  1305. dtype:DTypeEnum = Float32;
  1306. }
  1307. table GammaRNG {
  1308. seed:ulong = 0;
  1309. }
  1310. table BetaRNG {
  1311. seed:ulong = 0;
  1312. }
  1313. table PoissonRNG {
  1314. seed:ulong = 0;
  1315. }
  1316. table PermutationRNG {
  1317. seed:ulong = 0;
  1318. /// The dtype of output Tensor. Int32, Int16 and Float32 are supported.
  1319. dtype:DTypeEnum = Int32;
  1320. }
  1321. table ShuffleRNG {
  1322. seed:ulong = 0;
  1323. }
  1324. table Flip {
  1325. vertical:bool = false;
  1326. horizontal:bool = false;
  1327. }
  1328. table Rotate {
  1329. clockwise:bool = true;
  1330. }
  1331. table ROICopy {
  1332. row_from:uint = 0;
  1333. row_to:uint = 0;
  1334. col_from:uint = 0;
  1335. col_to:uint = 0;
  1336. }
  1337. table CvtColor {
  1338. mode:CvtColorMode = RGB2GRAY;
  1339. }
  1340. table WarpAffineV0 {
  1341. imode:WarpPerspectiveV1InterpolationMode = LINEAR;
  1342. border_mode:WarpPerspectiveV1BorderMode = REPLICATE;
  1343. /// used for CONSTANT bmode
  1344. border_val:float = .0;
  1345. }
  1346. table WarpAffineV1 {
  1347. imode:WarpPerspectiveV1InterpolationMode = LINEAR;
  1348. border_mode:WarpPerspectiveV1BorderMode = REPLICATE;
  1349. /// used for CONSTANT bmode
  1350. border_val:float = .0;
  1351. format:ConvolutionV0Format = NHWC;
  1352. }
  1353. table WarpAffine {
  1354. imode:WarpPerspectiveV1InterpolationMode = LINEAR;
  1355. border_mode:WarpPerspectiveV1BorderMode = REPLICATE;
  1356. /// used for CONSTANT bmode
  1357. border_val:float = .0;
  1358. format:ConvolutionFormat = NHWC;
  1359. }
  1360. table GaussianBlur {
  1361. border_mode:WarpPerspectiveV1BorderMode = REPLICATE;
  1362. kernel_height:uint = 0;
  1363. kernel_width:uint = 0;
  1364. sigma_x:float = 0.;
  1365. sigma_y:float = 0.;
  1366. }
  1367. table ResizeV0 {
  1368. imode:WarpPerspectiveV1InterpolationMode = LINEAR;
  1369. }
  1370. table ResizeV1 {
  1371. imode:WarpPerspectiveV1InterpolationMode = LINEAR;
  1372. format:ConvolutionV0Format = NHWC;
  1373. }
  1374. table Resize {
  1375. imode:WarpPerspectiveV1InterpolationMode = LINEAR;
  1376. format:ConvolutionFormat = NHWC;
  1377. }
  1378. table RemapV0 {
  1379. imode:WarpPerspectiveV1InterpolationMode = LINEAR;
  1380. border_type:WarpPerspectiveV1BorderMode = REPLICATE;
  1381. format:ConvolutionV0Format = NHWC;
  1382. scalar:float = 0.;
  1383. }
  1384. table Remap {
  1385. imode:WarpPerspectiveV1InterpolationMode = LINEAR;
  1386. border_type:WarpPerspectiveV1BorderMode = REPLICATE;
  1387. format:ConvolutionFormat = NHWC;
  1388. scalar:float = 0.;
  1389. }
  1390. table Convolution3D {
  1391. mode:Convolution3DMode = CROSS_CORRELATION;
  1392. /// padding on one side on the first dimension
  1393. pad_d:uint = 0;
  1394. /// padding on one side on the second dimension
  1395. pad_h:uint = 0;
  1396. /// padding on one side on the third dimension
  1397. pad_w:uint = 0;
  1398. /// kernel stride on the first dimension
  1399. stride_d:uint = 1;
  1400. /// kernel stride on the second dimension
  1401. stride_h:uint = 1;
  1402. /// kernel stride on the third dimension
  1403. stride_w:uint = 1;
  1404. /// dilation (i.e. size of each zero-padded kernel block) on the first
  1405. /// dimension
  1406. dilate_d:uint = 1;
  1407. /// dilation (i.e. size of each zero-padded kernel block) on the second
  1408. /// dimension
  1409. dilate_h:uint = 1;
  1410. /// dilation (i.e. size of each zero-padded kernel block) on the third
  1411. /// dimension
  1412. dilate_w:uint = 1;
  1413. sparse:Convolution3DSparse = DENSE;
  1414. data_type:Convolution3DDataType = FLOAT;
  1415. format:Convolution3DFormat = NCDHW;
  1416. }
  1417. table Conv3DBias {
  1418. nonlineMode:Conv3DBiasNonlineMode = IDENTITY;
  1419. mode:Convolution3DMode = CROSS_CORRELATION;
  1420. pad_d:uint = 0;
  1421. pad_h:uint = 0;
  1422. pad_w:uint = 0;
  1423. stride_d:uint = 1;
  1424. stride_h:uint = 1;
  1425. stride_w:uint = 0;
  1426. }
  1427. table SeparableConv3D {
  1428. mode:Convolution3DMode = CROSS_CORRELATION;
  1429. borderMode:SeparableConv3DBorderMode = BORDER_REPLICATE;
  1430. is_symm_kernel:bool = true;
  1431. pad_d:uint = 0;
  1432. pad_h:uint = 0;
  1433. pad_w:uint = 0;
  1434. stride_d:uint = 0;
  1435. stride_h:uint = 1;
  1436. stride_w:uint = 1;
  1437. ksize_d:uint = 0;
  1438. ksize_h:uint = 3;
  1439. ksize_w:uint = 3;
  1440. anchor_d:uint = 0;
  1441. anchor_h:uint = 1;
  1442. anchor_w:uint = 1;
  1443. }
  1444. table TopK {
  1445. mode:TopKMode = KTH_ONLY;
  1446. }
  1447. /// Change the tensor layout format
  1448. table RelayoutFormatV0 {
  1449. /// Relayout mode.
  1450. ///
  1451. /// **Naming conventions**
  1452. ///
  1453. /// 1. ``A_B`` means change from layout format ``A`` to ``B``.
  1454. /// 2. ``INTER_WEIGHT_xx`` means relayout the weight for faster processing by
  1455. /// :attr:`Convolution.Format.NHWCD4` convolutions.
  1456. /// 3. A suffix of ``I`` means ``Image2DPack4TensorFormat`` tensor format is used
  1457. /// for faster processing on GPUs.
  1458. ///
  1459. /// **Layout definitions**
  1460. ///
  1461. /// * ``NCHW`` layout: ``{N, C, H, W}``
  1462. /// * ``NHWC`` layout: ``{N, H, W, C}``
  1463. /// * ``NHWCD4`` layout: ``{N, H, (C + 3) / 4, W, 4}``
  1464. /// * ``NHWCD4I`` layout: with ``align_axis = 2``
  1465. /// * ``NCHW4`` layout: ``{N, C/4, H, W, 4}``
  1466. /// * ``NCHW88`` layout: ``{N, C/8, H, W, 8}``
  1467. /// * ``CHWN4`` layout: ``{C/4, H, W, N, 4}``
  1468. /// * ``NCHW64`` layout: ``{N, C/64, H, W, 64}``
  1469. ///
  1470. /// **Float weight transformation definitions**
  1471. ///
  1472. /// +---------------+---------------------------------+--------------------+--------------------------------------+------+
  1473. /// | Sparsity Type | Input Layout | Input Req | Output Layout | Axis |
  1474. /// +===============+=================================+====================+======================================+======+
  1475. /// | DENSE | ``{OC, IC, FH, FW}`` | ``OC % 4 == 0`` | ``{OC/4, FH, FW, IC, 4}`` | 3 |
  1476. /// +---------------+---------------------------------+--------------------+--------------------------------------+------+
  1477. /// | GROUP | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 4 == 0`` | ``{GROUP, OCPG/4, FH, FW, ICPG, 4}`` | 4 |
  1478. /// | | | ``ICPG % 4 == 0`` | | |
  1479. /// +---------------+---------------------------------+--------------------+--------------------------------------+------+
  1480. /// | CHAN | ``{GROUP, 1, 1, FH, FW}`` | ``GROUP % 4 == 0`` | ``{GROUP / 4, 1, FH ,FW, 4}`` | 1 |
  1481. /// +---------------+---------------------------------+--------------------+--------------------------------------+------+
  1482. ///
  1483. /// **Float weight transformation nchw88 definitions**
  1484. ///
  1485. /// +---------------+---------------------------------+--------------------+--------------------------------------+
  1486. /// | Sparsity Type | Input Layout | Input Req | Output Layout |
  1487. /// +===============+=================================+====================+======================================+
  1488. /// | DENSE | ``{OC, IC, FH, FW}`` | ``OC % 8 == 0`` |``{OC/8, IC/8 ,FH, FW, 8(IC), 8(OC)}``|
  1489. /// | | | ``IC % 8 == 0`` | |
  1490. /// +---------------+---------------------------------+--------------------+--------------------------------------+
  1491. /// | GROUP | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 8 == 0`` | ``{GROUP, OCPG/8, ICPG/8 FH, FW, |
  1492. /// | | | ``ICPG % 8 == 0`` | 8(ICPG), 8(OCPG)} `` |
  1493. /// +---------------+---------------------------------+--------------------+--------------------------------------+
  1494. /// | CHAN | ``{GROUP, 1, 1, FH, FW}`` | ``GROUP % 8 == 0`` | ``{GROUP / 8, 1, FH ,FW, 8}`` |
  1495. /// +---------------+---------------------------------+--------------------+--------------------------------------+
  1496. ///
  1497. /// **Int8(DOT) weight transformation definitions**
  1498. ///
  1499. /// +---------------+---------------------------------+--------------------+------------------------------------------+------+
  1500. /// | Sparsity Type | Input Layout | Input Req | Output Layout | Axis |
  1501. /// +===============+=================================+====================+==========================================+======+
  1502. /// | DENSE | ``{OC, IC, FH, FW}`` | ``OC % 4 == 0`` | ``{OC/4, FH, FW, IC/4, 4, 4}` | 3 |
  1503. /// +---------------+---------------------------------+--------------------+------------------------------------------+------+
  1504. /// | GROUP | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 4 == 0`` | ``{GROUP, OCPG/4, FH, FW, ICPG/4, 4, 4}``| 4 |
  1505. /// | | | ``ICPG % 4 == 0`` | | |
  1506. /// +---------------+---------------------------------+--------------------+------------------------------------------+------+
  1507. ///
  1508. /// Note: the axis column means the corresponding ``align_axis`` for image format
  1509. /// when the ``I`` suffix is present.
  1510. ///
  1511. /// Note: NCHW_NCHW4_WEIGHT will auto pad oc and ic, you should remove oc in later opr by seting group and oc param with NCHW4_NCHW
  1512. ///
  1513. mode:RelayoutFormatV0Mode = NHWC_NHWCD4;
  1514. }
  1515. /// Change the tensor layout format
  1516. table RelayoutFormat {
  1517. mode:RelayoutFormatV0Mode = NHWC_NHWCD4;
  1518. oc:uint = 0;
  1519. group:uint = 1;
  1520. }
  1521. table SeparableFilterV0 {
  1522. format:ConvolutionV0Format = NCHW;
  1523. borderMode:WarpPerspectiveV1BorderMode = REPLICATE;
  1524. is_symm_kernel:bool = true;
  1525. ksize_h:uint = 3;
  1526. ksize_w:uint = 3;
  1527. anchor_h:uint = 1;
  1528. anchor_w:uint = 1;
  1529. }
  1530. table SeparableFilter {
  1531. format:ConvolutionFormat = NCHW;
  1532. borderMode:WarpPerspectiveV1BorderMode = REPLICATE;
  1533. is_symm_kernel:bool = true;
  1534. ksize_h:uint = 3;
  1535. ksize_w:uint = 3;
  1536. anchor_h:uint = 1;
  1537. anchor_w:uint = 1;
  1538. }
  1539. /// Local share convolution
  1540. table LocalShareV0 {
  1541. mode:ConvolutionV0Mode = CROSS_CORRELATION;
  1542. /// padding on one side on the first dimension
  1543. pad_h:uint = 0;
  1544. /// padding on one side on the second dimension
  1545. pad_w:uint = 0;
  1546. /// kernel stride on the first dimension
  1547. stride_h:uint = 1;
  1548. /// kernel stride on the second dimension
  1549. stride_w:uint = 1;
  1550. /// dilation (i.e. size of each zero-padded kernel block) on the second
  1551. /// dimension
  1552. dilate_h:uint = 1;
  1553. /// dilation (i.e. size of each zero-padded kernel block) on the second
  1554. /// dimension
  1555. dilate_w:uint = 1;
  1556. /// spatial groups on the first dimension
  1557. spatial_groups_h:uint = 1;
  1558. /// spatial groups on the second dimension
  1559. spatial_groups_w:uint = 1;
  1560. sparse:ConvolutionV0Sparse = DENSE;
  1561. format:ConvolutionV0Format = NCHW;
  1562. computeMode:ConvolutionV1ComputeMode = DEFAULT;
  1563. }
  1564. /// Local share convolution
  1565. table LocalShare {
  1566. mode:ConvolutionV0Mode = CROSS_CORRELATION;
  1567. /// padding on one side on the first dimension
  1568. pad_h:uint = 0;
  1569. /// padding on one side on the second dimension
  1570. pad_w:uint = 0;
  1571. /// kernel stride on the first dimension
  1572. stride_h:uint = 1;
  1573. /// kernel stride on the second dimension
  1574. stride_w:uint = 1;
  1575. /// dilation (i.e. size of each zero-padded kernel block) on the second
  1576. /// dimension
  1577. dilate_h:uint = 1;
  1578. /// dilation (i.e. size of each zero-padded kernel block) on the second
  1579. /// dimension
  1580. dilate_w:uint = 1;
  1581. /// spatial groups on the first dimension
  1582. spatial_groups_h:uint = 1;
  1583. /// spatial groups on the second dimension
  1584. spatial_groups_w:uint = 1;
  1585. sparse:ConvolutionV0Sparse = DENSE;
  1586. format:ConvolutionFormat = NCHW;
  1587. computeMode:ConvolutionV1ComputeMode = DEFAULT;
  1588. }
  1589. table ROIAlignV0 {
  1590. mode:ROIAlignV0Mode = MAX_;
  1591. format:ConvolutionV0Format = NCHW;
  1592. spatial_scale:float = 1.0;
  1593. offset:float = 0.0;
  1594. pooled_height:uint = 1;
  1595. pooled_width:uint = 1;
  1596. sample_height:uint = 2;
  1597. sample_width:uint = 2;
  1598. }
  1599. table ROIAlign {
  1600. mode:ROIAlignV0Mode = MAX_;
  1601. format:ConvolutionFormat = NCHW;
  1602. spatial_scale:float = 1.0;
  1603. offset:float = 0.0;
  1604. pooled_height:uint = 1;
  1605. pooled_width:uint = 1;
  1606. sample_height:uint = 2;
  1607. sample_width:uint = 2;
  1608. }
  1609. table Correlation {
  1610. format:ConvolutionV0Format = NCHW;
  1611. kernel_size:uint = 1;
  1612. max_displacement:uint = 1;
  1613. stride1:uint = 1;
  1614. stride2:uint = 1;
  1615. pad_size:uint = 0;
  1616. is_multiply:bool = true;
  1617. }
  1618. table DeformablePSROIPooling {
  1619. no_trans:bool = true;
  1620. spatial_scale:float = 1;
  1621. trans_std:float = 1;
  1622. /// height of pooling output
  1623. pooled_h:uint = 1;
  1624. /// width of pooling output
  1625. pooled_w:uint = 1;
  1626. /// size of each deformable part
  1627. part_size:uint = 1;
  1628. /// sample count of each bbox
  1629. sample_per_part:uint = 1;
  1630. }
  1631. /// Batch convolution (unshare weights on the batch dimension)
  1632. table BatchConvBiasV0 {
  1633. nonlineMode:ConvBiasV0NonlineMode = IDENTITY;
  1634. mode:ConvolutionV0Mode = CROSS_CORRELATION;
  1635. /// padding on one side on the first dimension
  1636. pad_h:uint = 0;
  1637. /// padding on one side on the second dimension
  1638. pad_w:uint = 0;
  1639. /// kernel stride on the first dimension
  1640. stride_h:uint = 1;
  1641. /// kernel stride on the second dimension
  1642. stride_w:uint = 1;
  1643. /// dilation (i.e. size of each zero-padded kernel block) on the second
  1644. /// dimension
  1645. dilate_h:uint = 1;
  1646. /// dilation (i.e. size of each zero-padded kernel block) on the second
  1647. /// dimension
  1648. dilate_w:uint = 1;
  1649. sparse:ConvolutionV0Sparse = DENSE;
  1650. format:ConvolutionV0Format = NCHW;
  1651. compute_mode:ConvolutionV1ComputeMode = DEFAULT;
  1652. }
  1653. /// Batch convolution (unshare weights on the batch dimension)
  1654. table BatchConvBias {
  1655. nonlineMode:ConvBiasV0NonlineMode = IDENTITY;
  1656. mode:ConvolutionV0Mode = CROSS_CORRELATION;
  1657. /// padding on one side on the first dimension
  1658. pad_h:uint = 0;
  1659. /// padding on one side on the second dimension
  1660. pad_w:uint = 0;
  1661. /// kernel stride on the first dimension
  1662. stride_h:uint = 1;
  1663. /// kernel stride on the second dimension
  1664. stride_w:uint = 1;
  1665. /// dilation (i.e. size of each zero-padded kernel block) on the second
  1666. /// dimension
  1667. dilate_h:uint = 1;
  1668. /// dilation (i.e. size of each zero-padded kernel block) on the second
  1669. /// dimension
  1670. dilate_w:uint = 1;
  1671. sparse:ConvolutionV0Sparse = DENSE;
  1672. format:ConvolutionFormat = NCHW;
  1673. compute_mode:ConvolutionV1ComputeMode = DEFAULT;
  1674. }
  1675. table FakeQuant {
  1676. qmin:int = -2147483648;
  1677. qmax:int = 2147483647;
  1678. }
  1679. table TQT {
  1680. qmin:int = -2147483648;
  1681. qmax:int = 2147483647;
  1682. }
  1683. table LSQ {
  1684. qmin:int = -2147483648;
  1685. qmax:int = 2147483647;
  1686. }
  1687. table Fill {
  1688. value:float = 0;
  1689. }
  1690. table CheckNonFinite {
  1691. scale:float = 1.0;
  1692. }
  1693. table Padding {
  1694. /// offset in dim 0
  1695. front_offset_dim0:uint = 0;
  1696. /// offset in dim 1
  1697. front_offset_dim1:uint = 0;
  1698. /// offset in dim 2
  1699. front_offset_dim2:uint = 0;
  1700. /// offset in dim 3
  1701. front_offset_dim3:uint = 0;
  1702. /// offset in dim 4
  1703. front_offset_dim4:uint = 0;
  1704. /// offset in dim 5
  1705. front_offset_dim5:uint = 0;
  1706. /// offset in dim 6
  1707. front_offset_dim6:uint = 0;
  1708. /// back offset in dim0
  1709. back_offset_dim0:uint = 0;
  1710. /// back offset in dim1
  1711. back_offset_dim1:uint = 0;
  1712. /// back offset in dim2
  1713. back_offset_dim2:uint = 0;
  1714. /// back offset in dim3
  1715. back_offset_dim3:uint = 0;
  1716. /// back offset in dim4
  1717. back_offset_dim4:uint = 0;
  1718. /// back offset in dim5
  1719. back_offset_dim5:uint = 0;
  1720. /// back offset in dim6
  1721. back_offset_dim6:uint = 0;
  1722. /// param of padding opr
  1723. padding_val:float = 0;
  1724. padding_mode:PaddingPaddingMode = CONSTANT;
  1725. }
  1726. table LayerNorm {
  1727. affine:bool = true;
  1728. eps:float = 1e-5;
  1729. normalized_dim:ulong = 1;
  1730. normalized_size:ulong = 1;
  1731. }
  1732. table GroupNorm {
  1733. affine:bool = true;
  1734. eps:float = 1e-5;
  1735. group:uint = 1;
  1736. format:ConvolutionFormat = NCHW;
  1737. }
  1738. table Dropout {
  1739. drop_prob:float = 0;
  1740. seed:ulong = 0;
  1741. }
  1742. table RNNCell {
  1743. nonlineMode:RNNCellNonlineMode = IDENTITY;
  1744. }
  1745. table RNN {
  1746. /// Number of recurrent layers
  1747. num_layers:uint = 1;
  1748. /// If becomes a bidirectional RNN
  1749. bidirectional:bool = false;
  1750. /// If the layer use bias weights b_ih and b_hh
  1751. bias:bool = true;
  1752. /// The number of features in the hidden state
  1753. hidden_size:uint = 128;
  1754. /// If introduce a Dropout layer on the outputs of each RNN layer
  1755. dropout:float = 0.;
  1756. nonlineMode:RNNCellNonlineMode = IDENTITY;
  1757. fwd_mode:BNFwdMode = TRAINING;
  1758. }
  1759. table LSTM {
  1760. /// Number of recurrent layers
  1761. num_layers:uint = 1;
  1762. /// If becomes a bidirectional LSTM
  1763. bidirectional:bool = false;
  1764. /// If the layer use bias weights b_ih and b_hh
  1765. bias:bool = true;
  1766. /// The number of features in the hidden state
  1767. hidden_size:uint = 128;
  1768. /// If use LSTM with projections of corresponding size
  1769. proj_size:uint = 0;
  1770. /// If introduce a Dropout layer on the outputs of each LSTM layer
  1771. dropout:float = 0.;
  1772. fwd_mode:BNFwdMode = TRAINING;
  1773. }