You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

opr_param_defs.py 57 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203
  1. pdef('Empty')
  2. pdef('Axis').add_fields('int32', 'axis', 0)
  3. (pdef('Convolution', version=0, is_legacy=True).
  4. add_enum('Mode', 'CROSS_CORRELATION = 0', 'CONVOLUTION = 1').
  5. add_fields(
  6. 'uint32',
  7. Doc('pad_h', 'padding on one side on the first dimension'), 0,
  8. Doc('pad_w', 'padding on one side on the second dimension'), 0,
  9. Doc('stride_h', 'kernel stride on the first dimension'), 1,
  10. Doc('stride_w', 'kernel stride on the second dimension'), 1,
  11. Doc('dilate_h', 'dilation (i.e. size of each zero-padded kernel block) '
  12. 'on the second dimension'), 1,
  13. Doc('dilate_w', 'dilation (i.e. size of each zero-padded kernel block) '
  14. 'on the second dimension'), 1
  15. ).
  16. add_enum('DataType',
  17. Doc('FLOAT = 0', 'input/output both float32/float16'),
  18. 'INT8x8x16 = 1',
  19. 'INT8x8x32 = 2',
  20. Doc('FLOAT_IO16xC32 = 3', 'input/output both float16, the internal '
  21. 'compute is float32'),
  22. Doc('QUINT8x8x32 = 4', 'input QuantizedAsymm8, output QuantizedS32'),
  23. Doc('INT8x8xX = 5', 'input int8, output specified by tensor DType'),
  24. Doc('QUINT4x4x32 = 6', 'input QuantizedAsymm4, output QuantizedS32'),
  25. name_field='data_type').
  26. add_enum('Sparse',
  27. Doc('DENSE = 0', 'dense convolution: filter shape should be '
  28. '[oc, ic, spatial...] if format is NCHW, '
  29. '[oc, spatial..., ic] if format is NHWC'),
  30. Doc('GROUP = 1', 'group convolution: filter shape should be '
  31. '[group, oc_per_group, ic_per_group, spatial...] if format is NCHW, '
  32. '[group, oc_per_group, spatial..., ic_per_group] if format is NHWC')
  33. ).
  34. add_enum(Doc('Format', 'convolution data/filter/output format; see '
  35. ':class:`RelayoutFormat` for more details'),
  36. 'NCHW = 0', 'NHWC = 1', 'NHWCD4 = 2', 'NCHW4 = 3', 'NCHW8 = 4', 'NCHW32 = 5', 'NCHW88 = 6',
  37. 'NCHW44 = 7','NCHW44_DOT = 8',
  38. Doc('NCHW_WINOGRAD = 9', 'NCHW layout with weights tranformed by winograd'),
  39. Doc('NCHW88_WINOGRAD = 10', 'NCHW88 layout with weights tranformed by winograd'),
  40. Doc('NCHW44_WINOGRAD = 11', 'NCHW44 layout with weights tranformed by winograd'),
  41. Doc('NCHW4_NCHW32 = 12', 'NCHW4_NCHW32 means input tensors are nchw4 layout, output tensor is nchw32 layout'),
  42. Doc('NCHW32_NCHW4 = 13', 'NCHW32_NCHW4 means input tensors are nchw32 layout, output tensor is nchw4 layout'),
  43. Doc('NCHW4_NCHW = 14', 'NCHW4_NCHW means input tensors are nchw4 layout, output tensor is nchw layout'),
  44. Doc('NHWC_NCHW = 15', 'NHWC_NCHW means input tensors are nhwc layout, '
  45. 'output tensor is nchw layout'),
  46. Doc('NHWC_NCHW4_IC_SMALL = 16', 'NHWC_NCHW4_IC_SMALL means input tensors are nhwc(c < 4) layout, '
  47. 'output tensor is nchw4 layout, padding c=4'),
  48. Doc('NCHW_NCHW4_IC_SMALL = 17', 'NCHW_NCHW4_IC_SMALL means input tensors are nchw(c < 4) layout, '
  49. 'output tensor is nchw4 layout, padding c=4'),
  50. Doc('CHWN4 = 18', 'CHWN4 is currently only used on Nvidia platform for fast implementation '
  51. 'of convolution using CUDA/SASS. The channels are splitted to groups of 4 channels.'),
  52. Doc('NCHW4_NHWC = 19', 'NCHW4_NHWC means input tensors are nchw4 layout, output tensor is nhwc layout'))
  53. )
  54. (pdef('Convolution', version=1, is_legacy=True).
  55. add_enum_alias('Mode', 'ConvolutionV0').
  56. add_fields(
  57. 'uint32',
  58. Doc('pad_h', 'padding on one side on the first dimension'), 0,
  59. Doc('pad_w', 'padding on one side on the second dimension'), 0,
  60. Doc('stride_h', 'kernel stride on the first dimension'), 1,
  61. Doc('stride_w', 'kernel stride on the second dimension'), 1,
  62. Doc('dilate_h', 'dilation (i.e. size of each zero-padded kernel block) '
  63. 'on the second dimension'), 1,
  64. Doc('dilate_w', 'dilation (i.e. size of each zero-padded kernel block) '
  65. 'on the second dimension'), 1
  66. ).
  67. add_enum_alias('Sparse', 'ConvolutionV0').
  68. add_enum_alias('Format', 'ConvolutionV0').
  69. add_enum(Doc('ComputeMode', 'Specifies special computation modes, e.g. '
  70. 'different combinations of intermediate result '
  71. 'data types.'),
  72. Doc('DEFAULT = 0', 'No special requirements on the precision of '
  73. 'intermediate results.'),
  74. Doc('FLOAT32 = 1', 'Use Float32 accumulator and intermediate result. '
  75. 'Only supported when input and output is Float16.'),
  76. name_field='compute_mode')
  77. )
  78. (pdef('Convolution', version=2).
  79. add_enum_alias('Mode', 'ConvolutionV0').
  80. add_fields(
  81. 'uint32',
  82. Doc('pad_h', 'padding on one side on the first dimension'), 0,
  83. Doc('pad_w', 'padding on one side on the second dimension'), 0,
  84. Doc('stride_h', 'kernel stride on the first dimension'), 1,
  85. Doc('stride_w', 'kernel stride on the second dimension'), 1,
  86. Doc('dilate_h', 'dilation (i.e. size of each zero-padded kernel block) '
  87. 'on the second dimension'), 1,
  88. Doc('dilate_w', 'dilation (i.e. size of each zero-padded kernel block) '
  89. 'on the second dimension'), 1
  90. ).
  91. add_enum_alias('Sparse', 'ConvolutionV0').
  92. add_enum(Doc('Format', 'convolution data/filter/output format; see '
  93. ':class:`RelayoutFormat` for more details'),
  94. 'NCHW = 0', 'NHWC = 1', 'NHWCD4 = 2', 'NCHW4 = 3', 'NCHW8 = 4', 'NCHW32 = 5', 'NCHW88 = 6',
  95. 'NCHW44 = 7','NCHW44_DOT = 8',
  96. Doc('NCHW4_NCHW32 = 9', 'NCHW4_NCHW32 means input tensors are nchw4 layout, output tensor is nchw32 layout'),
  97. Doc('NCHW32_NCHW4 = 10', 'NCHW32_NCHW4 means input tensors are nchw32 layout, output tensor is nchw4 layout'),
  98. Doc('NCHW4_NCHW = 11', 'NCHW4_NCHW means input tensors are nchw4 layout, output tensor is nchw layout'),
  99. Doc('NHWC_NCHW = 12', 'NHWC_NCHW means input tensors are nhwc layout, '
  100. 'output tensor is nchw layout'),
  101. Doc('NHWC_NCHW4_IC_SMALL = 13', 'NHWC_NCHW4_IC_SMALL means input tensors are nhwc(c < 4) layout, '
  102. 'output tensor is nchw4 layout, padding c=4'),
  103. Doc('NCHW_NCHW4_IC_SMALL = 14', 'NCHW_NCHW4_IC_SMALL means input tensors are nchw(c < 4) layout, '
  104. 'output tensor is nchw4 layout, padding c=4'),
  105. Doc('CHWN4 = 15', 'CHWN4 is currently only used on Nvidia platform for fast implementation '
  106. 'of convolution using CUDA/SASS. The channels are splitted to groups of 4 channels.'),
  107. Doc('NCHW64 = 16', 'NCHW64 is designed for convolution implementation to utilizing TensorCore '
  108. 'instructions for 4-bit integers on Nvidia platforms'),
  109. Doc('NCHW4_NHWC = 17', 'NCHW4_NHWC means input tensors are nchw4 layout, output tensor is nhwc layout')).
  110. add_enum_alias('ComputeMode', 'ConvolutionV1',name_field='compute_mode')
  111. )
  112. (pdef('MaskPropagate').
  113. add_fields(
  114. 'uint32',
  115. Doc('pad_h', 'padding on one side on the first dimension'), 0,
  116. Doc('pad_w', 'padding on one side on the second dimension'), 0,
  117. Doc('stride_h', 'kernel stride on the first dimension'), 1,
  118. Doc('stride_w', 'kernel stride on the second dimension'), 1,
  119. Doc('kernel_h', 'kernel height'), 1,
  120. Doc('kernel_w', 'kernel width'), 1,
  121. Doc('dilate_h', 'dilate height'), 1,
  122. Doc('dilate_w', 'dilate width'), 1)
  123. )
  124. (pdef('ConvPooling').
  125. add_enum('Method', 'WITH_TEXTURE_OBJ = 0', 'WITH_SHARED_MEM = 1').
  126. add_enum_alias('ConvMode', 'ConvolutionV0', 'Mode').
  127. add_enum('PoolMode', 'AVERAGE = 0', 'MAX = 1').
  128. add_enum('NonlineMode', 'IDENTITY = 0', 'RELU = 1', 'SIGMOID = 2').
  129. add_fields('uint32', 'pool_shape_h', 1, 'pool_shape_w', 1, 'pool_stride_h', 1, 'pool_stride_w', 1, \
  130. 'pool_pad_h', 0, 'pool_pad_w', 0, 'conv_stride_h', 1, 'conv_stride_w', 1, 'conv_pad_h', 0, 'conv_pad_w', 0))
  131. (pdef('ConvBias', 'legacy conv_bias', version=0, is_legacy=True).
  132. add_enum('NonlineMode', 'IDENTITY = 0', 'RELU = 1', 'SIGMOID = 2', 'H_SWISH = 3').
  133. add_enum_alias('Mode', 'ConvolutionV0').
  134. add_fields('uint32', 'pad_h', 0, 'pad_w', 0, 'stride_h', 1, 'stride_w', 1))
  135. (pdef('ConvBias', 'active(conv(x, w) + bias)', version=1, is_legacy=True).
  136. add_enum_alias('NonlineMode', 'ConvBiasV0').
  137. add_enum_alias('Mode', 'ConvolutionV0').
  138. add_enum_alias('DataType', 'ConvolutionV0', name_field='data_type').
  139. add_enum_alias('Sparse', 'ConvolutionV0').
  140. add_enum_alias('Format', 'ConvolutionV0').
  141. add_fields(
  142. 'uint32',
  143. Doc('pad_h', 'padding on one side on the first dimension'), 0,
  144. Doc('pad_w', 'padding on one side on the second dimension'), 0,
  145. Doc('stride_h', 'kernel stride on the first dimension'), 1,
  146. Doc('stride_w', 'kernel stride on the second dimension'), 1,
  147. Doc('dilate_h', 'dilation (i.e. size of each zero-padded kernel block) '
  148. 'on the second dimension'), 1,
  149. Doc('dilate_w', 'dilation (i.e. size of each zero-padded kernel block) '
  150. 'on the second dimension'), 1)
  151. )
  152. (pdef('ConvBias', 'active(conv(x, w) + bias)', version=2, is_legacy=True).
  153. add_enum_alias('NonlineMode', 'ConvBiasV0').
  154. add_enum_alias('Mode', 'ConvolutionV0').
  155. add_enum_alias('Sparse', 'ConvolutionV0').
  156. add_enum_alias('Format', 'ConvolutionV0').
  157. add_fields(
  158. 'uint32',
  159. Doc('pad_h', 'padding on one side on the first dimension'), 0,
  160. Doc('pad_w', 'padding on one side on the second dimension'), 0,
  161. Doc('stride_h', 'kernel stride on the first dimension'), 1,
  162. Doc('stride_w', 'kernel stride on the second dimension'), 1,
  163. Doc('dilate_h', 'dilation (i.e. size of each zero-padded kernel block) '
  164. 'on the second dimension'), 1,
  165. Doc('dilate_w', 'dilation (i.e. size of each zero-padded kernel block) '
  166. 'on the second dimension'), 1).
  167. add_enum_alias('ComputeMode', 'ConvolutionV1', name_field='compute_mode')
  168. )
  169. (pdef('ConvBias', 'active(conv(x, w) + bias)', version=3, is_legacy=True).
  170. add_enum_alias('NonlineMode', 'ConvBiasV0').
  171. add_enum_alias('Mode', 'ConvolutionV0').
  172. add_enum_alias('Sparse', 'ConvolutionV0').
  173. add_enum_alias('Format', 'ConvolutionV0').
  174. add_fields(
  175. 'uint32',
  176. Doc('pad_h', 'padding on one side on the first dimension'), 0,
  177. Doc('pad_w', 'padding on one side on the second dimension'), 0,
  178. Doc('stride_h', 'kernel stride on the first dimension'), 1,
  179. Doc('stride_w', 'kernel stride on the second dimension'), 1,
  180. Doc('dilate_h', 'dilation (i.e. size of each zero-padded kernel block) '
  181. 'on the second dimension'), 1,
  182. Doc('dilate_w', 'dilation (i.e. size of each zero-padded kernel block) '
  183. 'on the second dimension'), 1,
  184. Doc('output_block_size', 'detail meaning \see winograd in conv bias'), 0).
  185. add_enum_alias('ComputeMode', 'ConvolutionV1', name_field='compute_mode')
  186. )
  187. (pdef('ConvBias', 'active(conv(x, w) + bias)', version=4).
  188. add_enum_alias('NonlineMode', 'ConvBiasV0').
  189. add_enum_alias('Mode', 'ConvolutionV0').
  190. add_enum_alias('Sparse', 'ConvolutionV0').
  191. add_enum_alias('Format', 'Convolution').
  192. add_fields(
  193. 'uint32',
  194. Doc('pad_h', 'padding on one side on the first dimension'), 0,
  195. Doc('pad_w', 'padding on one side on the second dimension'), 0,
  196. Doc('stride_h', 'kernel stride on the first dimension'), 1,
  197. Doc('stride_w', 'kernel stride on the second dimension'), 1,
  198. Doc('dilate_h', 'dilation (i.e. size of each zero-padded kernel block) '
  199. 'on the second dimension'), 1,
  200. Doc('dilate_w', 'dilation (i.e. size of each zero-padded kernel block) '
  201. 'on the second dimension'), 1).
  202. add_enum_alias('ComputeMode', 'ConvolutionV1', name_field='compute_mode')
  203. )
  204. (pdef('SeparableConv').
  205. add_enum_alias('Mode', 'ConvolutionV0').
  206. add_enum('BorderMode', 'BORDER_REPLICATE = 0', 'BORDER_REFLECT = 1',
  207. 'BORDER_REFLECT_101 = 2','BORDER_WRAP = 3',
  208. 'BORDER_CONSTANT = 4', 'BORDER_TRANSPARENT = 5','BORDER_ISOLATED = 6').
  209. add_fields('bool', 'is_symm_kernel', 'true').
  210. add_fields('uint32', 'pad_h', 0, 'pad_w', 0, 'stride_h', 1, 'stride_w', 1,
  211. 'ksize_h', 3, 'ksize_w', 3, 'anchor_h', 1, 'anchor_w', 1))
  212. (pdef('Images2Neibs').
  213. add_fields('uint32', 'pad_h', 0, 'pad_w', 0, 'stride_h', 1, 'stride_w', 1,
  214. 'dilate_h', 1, 'dilate_w', 1, 'window_h', 3, 'window_w', 3))
  215. (pdef('SlidingWindowTranspose').
  216. add_fields('uint32', 'out_h', 0, 'out_w', 0, 'pad_h', 0, 'pad_w', 0, 'stride_h', 1, 'stride_w', 1,
  217. 'dilate_h', 1, 'dilate_w', 1, 'window_h', 3, 'window_w', 3))
  218. (pdef('Pooling', version=0, is_legacy=True).
  219. add_enum(
  220. 'Mode',
  221. Doc('MAX = 0', 'maximum value inside pooling window'),
  222. Doc('AVERAGE = 1',
  223. 'arithmetic mean of all values inside pooling window. Padding values '
  224. 'are taken into account and are viewed as zero'),
  225. Doc('AVERAGE_COUNT_EXCLUDE_PADDING = 2',
  226. 'arithmetic mean of all values inside pooling window. No padding is'
  227. 'used.')
  228. ).
  229. add_fields('uint32', 'pad_h', 0, 'pad_w', 0, 'stride_h', 2, 'stride_w', 2,
  230. 'window_h', 2, 'window_w', 2).
  231. add_enum_alias('Format', 'ConvolutionV0')
  232. )
  233. (pdef('Pooling', version=1).
  234. add_enum_alias('Mode','PoolingV0').
  235. add_fields('uint32', 'pad_h', 0, 'pad_w', 0, 'stride_h', 2, 'stride_w', 2,
  236. 'window_h', 2, 'window_w', 2).
  237. add_enum_alias('Format', 'Convolution')
  238. )
  239. (pdef('AdaptivePooling', version=0, is_legacy=True).
  240. add_enum_alias('Mode', 'PoolingV0').
  241. add_enum_alias('Format', 'ConvolutionV0')
  242. )
  243. (pdef('AdaptivePooling', version=1).
  244. add_enum_alias('Mode', 'PoolingV0').
  245. add_enum_alias('Format', 'Convolution')
  246. )
  247. (pdef('LRN',
  248. 'see ImageNet Classification with Deep Convolutional Neural Networks for'
  249. ' meaning of the fields').
  250. add_fields('uint32', Doc('n', 'must be odd'), 5).
  251. add_fields('float32', 'k', '2.f', 'alpha', '1e-4f', 'beta', '0.75f')
  252. )
  253. (pdef('BN').
  254. add_enum(
  255. 'ParamDim',
  256. Doc('DIM_11HW = 0', 'Dim of params (Sigma, Mu) is 1 x 1 x H x W'),
  257. Doc('DIM_1CHW = 1', 'Dim of params (Sigma, Mu) is 1 x C x H x W'),
  258. Doc('DIM_1C11 = 2', 'Dim of params (Sigma, Mu) is 1 x C x 1 x 1'),
  259. Doc('DIM_111C = 3', 'Dim of params (Sigma, Mu) is 1 x 1 x 1 x C'),
  260. name_field='param_dim'
  261. ).
  262. add_enum(
  263. 'FwdMode',
  264. Doc('TRAINING = 0', 'Training phase.'),
  265. Doc('INFERENCE = 1', 'Inference phase.'),
  266. name_field='fwd_mode'
  267. ).
  268. add_fields('float64', 'epsilon', '1e-4f').
  269. add_fields('float64', 'avg_factor', '1.f').
  270. add_fields('float32', 'scale', '1.f').
  271. add_fields('float32', 'bias', '0.f')
  272. )
  273. (pdef('ROIPooling').
  274. add_enum(
  275. 'Mode',
  276. Doc('MAX = 0', 'maximum value inside pooling window; pooling result would '
  277. 'be 0 if pooling window is empty'),
  278. Doc('AVERAGE = 1',
  279. 'arithmetic mean of all values inside pooling window; pooling result '
  280. 'would be 0 if pooling window is empty')
  281. ).
  282. add_fields('float32', 'scale', '1.f'))
  283. INTERP_MODES = ['NEAREST = 0', 'LINEAR = 1', 'AREA = 2', 'CUBIC = 3', 'LANCZOS4 = 4']
  284. BORDER_MODES = [Doc('REPLICATE = 0', 'aaaaaa|abcdefgh|hhhhhhh'),
  285. Doc('REFLECT = 1', 'fedcba|abcdefgh|hgfedcb'),
  286. Doc('REFLECT_101 = 2', 'gfedcb|abcdefgh|gfedcba'),
  287. Doc('WRAP = 3', 'cdefgh|abcdefgh|abcdefg'),
  288. Doc('CONSTANT = 4', 'iiiiii|abcdefgh|iiiiiii'),
  289. Doc('TRANSPARENT = 5', ''),
  290. Doc('ISOLATED = 6', '')]
  291. (pdef('WarpPerspective', version=1, is_legacy=True).
  292. add_enum('InterpolationMode', *INTERP_MODES,
  293. name_field='imode', default=1,
  294. member_alias=[(i, 'INTER_{}'.format(i)) for i in INTERP_MODES]
  295. ).
  296. add_enum('BorderMode', *BORDER_MODES,
  297. name_field='bmode',
  298. member_alias=[(i, 'BORDER_{}'.format(i)) for i in BORDER_MODES]
  299. ).
  300. add_enum_alias('Format', 'ConvolutionV0').
  301. add_fields('float32', Doc('border_val', 'used for CONSTANT bmode'), '.0f'))
  302. (pdef('WarpPerspective', version=2).
  303. add_enum_alias('InterpolationMode','WarpPerspectiveV1',name_field="imode").
  304. add_enum_alias('BorderMode','WarpPerspectiveV1',name_field="bmode").
  305. add_enum_alias('Format', 'Convolution').
  306. add_fields('float32', Doc('border_val', 'used for CONSTANT bmode'), '.0f'))
  307. pdef('SpatialTfGridGenerator').add_enum('Mode', 'AFFINE = 0')
  308. pdef('SpatialTfSampler').add_enum('Mode', 'BILINEAR = 0')
  309. pdef('AddUpdate').add_fields(
  310. 'float32', 'alpha', '1.f', 'beta', '1.f', 'bias', '0.f')
  311. pdef('Elemwise').add_enum(
  312. 'Mode',
  313. Doc('RELU = 0', 'unary: max(x, 0)'),
  314. Doc('ABS = 1', 'unary: abs(x)'),
  315. Doc('ACOS = 2', 'unary: acos(x)'),
  316. Doc('ASIN = 3', 'unary: asin(x)'),
  317. Doc('CEIL = 4', 'unary: ceil(x)'),
  318. Doc('COS = 5', 'unary: cos(x)'),
  319. Doc('EXP = 6', 'unary: exp(x)'),
  320. Doc('EXPM1 = 7', 'unary: numerically stable exp(x)-1'),
  321. Doc('FLOOR = 8', 'unary: floor(x)'),
  322. Doc('LOG = 9', 'unary: natural logarithm, log(x)'),
  323. Doc('LOG1P = 10', 'unary: numerically stable log(x+1)'),
  324. Doc('NEGATE = 11', 'unary: -x'),
  325. Doc('SIGMOID = 12', 'unary: 1/(1+exp(-x))'),
  326. Doc('SIN = 13', 'unary: sin(x)'),
  327. Doc('TANH = 14', 'unary: tanh(x)'),
  328. Doc('ABS_GRAD = 15', 'binary: x > 0 ? y : -y'),
  329. Doc('ADD = 16', 'binary: x + y'),
  330. Doc('FLOOR_DIV = 17', 'binary: floor(x / y)'),
  331. Doc('MAX = 18', 'binary: max(x, y)'),
  332. Doc('MIN = 19', 'binary: min(x, y)'),
  333. Doc('MOD = 20', 'binary: x % y or fmodf(x, y)'),
  334. Doc('MUL = 21', 'binary: x * y'),
  335. Doc('POW = 22', 'binary: pow(x, y)'),
  336. Doc('SIGMOID_GRAD = 23', 'binary: x * (1 - x) * y'),
  337. Doc('SUB = 24', 'binary: x - y'),
  338. Doc('SWITCH_GT0 = 25', 'binary: (x > 0) * y'),
  339. Doc('TANH_GRAD = 26', 'binary: (1 - x * x) * y'),
  340. Doc('TRUE_DIV = 27', 'binary: x / y'),
  341. Doc('LOG_SUM_EXP = 28', 'binary: numerically stable log(exp(x) + exp(y))'),
  342. Doc('LT = 29', 'binary: x < y'),
  343. Doc('LEQ = 30', 'binary: x <= y'),
  344. Doc('EQ = 31', 'binary: x == y'),
  345. Doc('SHL = 32', 'bitwise binary: x << y. '
  346. 'Note that result is undefined if y < 0 or y >= bitwidth. Logical '
  347. 'shift is performed for unsigned intergers, and arithmetic shift for '
  348. 'signed ones.'),
  349. Doc('SHR = 33', 'bitwise binary: x >> y; see SHL mode for more details'),
  350. Doc('COND_LEQ_MOV = 34', 'ternary: x <= y ? z : 0'),
  351. Doc('FUSE_MUL_ADD3 = 35',
  352. 'compute ``a * b + c`` where c must either have same layout as '
  353. 'a or b, or be a scalar'),
  354. Doc('FUSE_MUL_ADD4 = 36',
  355. 'compute ``a * A + b * B`` where a and b must have equal layout, '
  356. 'and A and B must have equal layout. In the inputs ``b`` and ``B`` '
  357. 'can be swapped'),
  358. Doc('FUSE_ADD_RELU = 37', 'binary: max(x+y, 0)'),
  359. Doc('FUSE_ADD_SIGMOID = 38', 'binary: 1/(1+exp(-(x+y)))'),
  360. Doc('FUSE_ADD_TANH = 39', 'binary: tanh(x+y)'),
  361. Doc('FAST_TANH = 40', 'unary: rational approximation of tanh(x)'),
  362. Doc('FAST_TANH_GRAD = 41', 'binary: grad of the rational approximation of tanh(x)'),
  363. Doc('ROUND = 42', 'unary: round(x), the nearest integer value to x, rounding '
  364. 'halfway cases away from zero. Float only.'),
  365. Doc('RMULH = 43', 'binary: rounded higher l bits of x * y, where l is the bit '
  366. 'length of x.'),
  367. Doc('ATAN2 = 44','binary: atan2(y,x)'),
  368. Doc('ERF = 45', 'unary: erf(x)'),
  369. Doc('ERFINV = 46', 'unary: inverse function of erf(x)'),
  370. Doc('ERFC = 47', 'unary: erfc(x)'),
  371. Doc('ERFCINV = 48', 'unary: inverse function of erfc(x)'),
  372. Doc('H_SWISH = 49', 'unary: x * clip(x + 3, 0, 6) / 6'),
  373. Doc('H_SWISH_GRAD = 50', 'binary: x < -3 ? 0 : (x > 3 ? y : (2 * x + 3) / 6 * y)'),
  374. Doc('FUSE_ADD_H_SWISH = 51', 'binary: hswish(x+y)'),
  375. Doc('NOT = 52', 'unary: !x'),
  376. Doc('AND = 53', 'binary: x && y'),
  377. Doc('OR = 54', 'binary: x || y'),
  378. Doc('XOR = 55', 'binary: x ^ y'),
  379. Doc('SILU = 56', 'unary: x / (1 + exp(-x))'),
  380. Doc('SILU_GRAD = 57', 'binary: grad(x / (1 + exp(-x))'),
  381. Doc('GELU = 58', 'unary: x Phi(x)'),
  382. Doc('GELU_GRAD = 59', 'binary: grad(x Phi(x))'),
  383. )
  384. pdef('ElemwiseMultiType').add_enum(
  385. 'Mode',
  386. Doc('FUSE_MUL_ADD3_INT16x32x32x32 = 0',
  387. 'compute ``a * b + c`` requiring that ``a`` be int16 and ``b`` and '
  388. '``c`` int32, and the result is int32. This mode is optimized for '
  389. 'the channel-broadacsted case, i.e. ``a`` has shape (A, B, C) and '
  390. '``b`` and ``c`` have shape (1, C, 1)'),
  391. Doc('FUSE_MUL_ADD3_IXxF32xF32xI8 = 1',
  392. 'compuate ``a * b + c`` where the inputs ``a`` is an integer type '
  393. '``b`` and ``c`` are both ``float32``, the result is '
  394. '``int8``. This is currently only optimized for ``(1, x)`` '
  395. 'broadcast for ``b`` and ``c``. Computation is carried in floating '
  396. 'points and results are rounded towards zero with saturated cast to '
  397. 'int.'),
  398. Doc('ROUND_SHR_SATURATE_IXxI8xI8 = 2',
  399. 'Compute ``a >> b``, round the result according to lower ``b`` bits '
  400. 'of ``a``` and make a saturating conversion to int8. Where ``a`` should'
  401. ' be an integer tensor and ``b`` should be an int8 scalar.'),
  402. Doc('FUSE_ADD_RMULH_ROUND_SHR_SATURATE_INT16x16x16x8 = 3',
  403. 'Fused operation of an int16 elemwise add, an int16 rounding multiply '
  404. 'high and an int16 to int8 rounding right shift with saturation.'),
  405. Doc('FUSE_ADD_RMULH_ROUND_SHR_SATURATE_INT32x32x32x8 = 4',
  406. 'Fused operation of an int32 elemwise add, an int32 rounding multiply '
  407. 'high and an int32 to int8 rounding right shift with saturation.'),
  408. Doc('ROUND_SHR_SATURATE_IXxI8xI16 = 5',
  409. 'Compute ``a >> b``, round the result according to lower ``b`` bits of '
  410. '``a``` and make a saturating conversion to int16. Where ``a`` should'
  411. ' be an integer tensor and ``b`` should be an int8 scalar.'),
  412. Doc('QADD = 6', 'Fused elemwise add two quantized int8 with specified'
  413. 'output quantized dtype'),
  414. Doc('QFUSE_ADD_RELU = 7', 'Fused elemwise add two quantized int8 followed'
  415. ' by ReLU and typecvt to specified dtype'),
  416. Doc('QMUL = 8', 'Fused elemwise multiply two quantized int8 with specified'
  417. 'output quantized dtype'),
  418. Doc('QMIN = 9', 'Fused elemwise min two quantized int8 with specified'
  419. 'output quantized dtype'),
  420. Doc('QMAX = 10', 'quantized: max(x, y), with specified output quantized dtype'),
  421. Doc('QSUB = 11', 'quantized: x - y'),
  422. Doc('QTRUE_DIV = 12', 'quantized: x / y'),
  423. Doc('QFUSE_ADD_SIGMOID = 13', 'quantized: sigmoid(x + y)'),
  424. Doc('QFUSE_ADD_TANH = 14', 'quantized: tanh(x + y)'),
  425. Doc('QRELU = 15', 'quantized: x > 0 ? x : 0'),
  426. Doc('QABS = 16', 'quantized: x > 0 ? x : -x'),
  427. Doc('QSIGMOID = 17', 'quantized: sigmoid(x)'),
  428. Doc('QEXP = 18', 'quantized: exp(x)'),
  429. Doc('QTANH = 19', 'quantized: tanh(x)'),
  430. Doc('QFUSE_MUL_ADD3 = 20', 'quantized: x * y + z'),
  431. Doc('QFAST_TANH = 21', 'quantized: fast_tanh(x)'),
  432. Doc('QNEGATE = 22', 'quantized: -x'),
  433. Doc('QACOS = 23', 'quantized: acos(x)'),
  434. Doc('QASIN = 24', 'quantized: asin(x)'),
  435. Doc('QCEIL = 25', 'quantized: ceil(x)'),
  436. Doc('QCOS = 26', 'quantized: cos(x)'),
  437. Doc('QEXPM1 = 27', 'quantized: expm1(x)'),
  438. Doc('QFLOOR = 28', 'quantized: floor(x)'),
  439. Doc('QLOG = 29', 'quantized: log(x)'),
  440. Doc('QLOG1P = 30', 'quantized: log1p(x)'),
  441. Doc('QSIN = 31', 'quantized: sin(x)'),
  442. Doc('QROUND = 32', 'quantized: round(x)'),
  443. Doc('QERF = 33', 'quantized: erf(x)'),
  444. Doc('QERFINV = 34', 'quantized: erfinv(x)'),
  445. Doc('QERFC = 35', 'quantized: erfc(x)'),
  446. Doc('QERFCINV = 36', 'quantized: erfcinv(x)'),
  447. Doc('QABS_GRAD = 37', 'quantized: abs_grad'),
  448. Doc('QFLOOR_DIV = 38', 'quantized floor_div'),
  449. Doc('QMOD = 39', 'quantized mod'),
  450. Doc('QSIGMOID_GRAD = 40', 'quantized sigmoid_grad'),
  451. Doc('QSWITCH_GT0 = 41', 'quantized switch_gt0'),
  452. Doc('QTANH_GRAD = 42', 'quantized tanh_grad'),
  453. Doc('QLT = 43', 'quantized lt'),
  454. Doc('QLEQ = 44', 'quantized leq'),
  455. Doc('QEQ = 45', 'quantized eq'),
  456. Doc('QPOW = 46', 'quantized pow'),
  457. Doc('QLOG_SUM_EXP = 47', 'quantized log_sum_exp'),
  458. Doc('QFAST_TANH_GRAD = 48', 'quantized fast_tanh_grad'),
  459. Doc('QATAN2 = 49', 'quantized atan2'),
  460. Doc('QCOND_LEQ_MOV = 50', 'quantized cond_leq_mov'),
  461. Doc('QH_SWISH = 51', 'quantized h_swish'),
  462. Doc('QFUSE_ADD_H_SWISH = 52', 'quantized h_swish(x+y)'),
  463. Doc('QH_SWISH_GRAD = 53', 'quantized h_swish_grad')
  464. )
  465. pdef('PowC', 'power with constant exponent').add_fields('float32', 'exp', 0)
  466. (pdef('DctChannelSelect', '2d discrete cosine transform', version=0, is_legacy=True).add_enum_alias('Format', 'ConvolutionV0').
  467. add_enum('FastImpl', 'NONE = 0', 'FIX_32_MASK = 1').add_fields('int32', 'dct_block_size', 8))
  468. (pdef('DctChannelSelect', '2d discrete cosine transform', version=1).add_enum_alias('Format', 'Convolution').
  469. add_enum_alias('FastImpl', 'DctChannelSelectV0').add_fields('int32', 'dct_block_size', 8))
  470. (pdef('MatrixMul', version=0, is_legacy=True).
  471. add_fields('bool', 'transposeA', 'false', 'transposeB', 'false').
  472. add_enum('DataType',
  473. Doc('FLOAT = 0', 'input/output both float32/float16'),
  474. 'INT8x8x16 = 1',
  475. 'INT8x8x32 = 2',
  476. Doc('FLOAT_IO16xC32 = 3', 'input/output both float16, the internal compute is '
  477. 'float32'),
  478. Doc('QUINT8x8x32 = 4', 'input QuantizedAsymm8, output QuantizedS32'),
  479. Doc('QUINT4x4x32 = 5', 'input QuantizedAsymm4, output QuantizedS32'),
  480. name_field='data_type'))
  481. (pdef('MatrixMul', version=1, is_legacy=True).
  482. add_fields('bool', 'transposeA', 'false', 'transposeB', 'false').
  483. add_enum(Doc('ComputeMode', 'Specifies special computation modes, e.g. '
  484. 'different combinations of intermediate result '
  485. 'data types.'),
  486. Doc('DEFAULT = 0', 'No special requirements on the precision of '
  487. 'intermediate results.'),
  488. Doc('FLOAT32 = 1', 'Use Float32 accumulator and intermediate result. '
  489. 'Only supported when input and output is Float16.'),
  490. name_field='compute_mode'))
  491. (pdef('MatrixMul', version=2).
  492. add_fields('bool', 'transposeA', 'false', 'transposeB', 'false').
  493. add_enum_alias('ComputeMode', 'MatrixMulV1', name_field='compute_mode').
  494. add_enum('Format',
  495. Doc('DEFAULT = 0', 'Normal matrix mul: (M, K) x (K, N) = (M, N)'),
  496. Doc('MK4 = 1', 'Split 4 from M and K, better for neon compute:'
  497. '(M/4, K/4, 4(k), 4(m)) x (K/4, N, 4(k)). if transposeA the '
  498. 'layout is (K/4, M/4, 4(k), 4(m)) x (K/4, N, 4(k))'),
  499. Doc('MK8 = 2', 'Split 8 from M and K, better for neon compute:'
  500. '(M/8, K/8, 8(k), 8(m)) x (K/8, N, 8(k)). if transposeA the '
  501. 'layout is (K/8, M/8, 8(k), 8(m)) x (K/8, N, 8(k))'),
  502. Doc('MK4_DOT = 3', 'Split 4 from M and K, better for neon dotprod:'
  503. 'M/4, K/4, 4(m), 4(k)) x (K/4, N, 4(k)). if transposeA the '
  504. 'layout is (K/4, M/4, 4(m), 4(k)) x (K/4, N, 4(k))'))
  505. )
  506. (pdef('SVD').
  507. add_fields('bool',
  508. Doc('full_matrices',
  509. 'Whether to compute the full-sized u and v or only the leading'
  510. ' min(m, n) singular vectors. Ignored if compute_uv is '
  511. 'false.'),
  512. 'false',
  513. Doc('compute_uv',
  514. 'Whether the left (u) and right (v) singular vectors will be '
  515. 'computed and outputted.'),
  516. 'true'))
  517. (pdef('Reduce', 'legacy reduce', version=0, is_legacy=True).
  518. add_enum('Mode',
  519. 'SUM = 0',
  520. Doc('SUM_SQR = 1', 'sum of x * x for each element x'),
  521. 'PRODUCT = 2', 'MIN = 3', 'MAX = 4').
  522. add_fields('int32',
  523. Doc('axis',
  524. 'axis along which reduction is performed; if -1 is given, '
  525. 'reduce to given target shape (only used in megbrain)'),
  526. -1))
  527. (pdef('Reduce', 'reduce along given axis', version=1, is_legacy=True).
  528. add_enum('Mode',
  529. 'SUM = 0',
  530. Doc('SUM_SQR = 1', 'sum of x * x for each element x'),
  531. 'PRODUCT = 2', 'MIN = 3', 'MAX = 4', 'MEAN = 5').
  532. add_fields('int32',
  533. Doc('axis',
  534. 'axis along which reduction is performed; if -1 is given, '
  535. 'reduce to given target shape (only used in megbrain)'),
  536. -1).
  537. add_enum('DataType',
  538. Doc('DEFAULT = 0',
  539. '''
  540. input/output are the same data type, and the internal computation type would be chosen by the input/output dtypes and the reduction mode.
  541. Currently, ```DEFAULT``` mode means:
  542. +--------------------+-----------------------------------+-------------------+
  543. | Input/Output DType | Mode | Computation DType |
  544. +====================+===================================+===================+
  545. | FLOAT32 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | FLOAT32 |
  546. +--------------------+-----------------------------------+-------------------+
  547. | FLOAT16 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | FLOAT16 |
  548. +--------------------+-----------------------------------+-------------------+
  549. | INT32 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | INT32 |
  550. +--------------------+-----------------------------------+-------------------+
  551. | INT8 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | INT8 |
  552. +--------------------+-----------------------------------+-------------------+
  553. | QuantizedS8 | MIN/MAX | QuantizedS8 |
  554. +--------------------+-----------------------------------+-------------------+
  555. | QuantizedS8 | MEAN/SUM | QuantizedS32 |
  556. +--------------------+-----------------------------------+-------------------+
  557. | Quantized8Asymm | MIN/MAX | Quantized8Asymm |
  558. +--------------------+-----------------------------------+-------------------+
  559. | Quantized8Asymm | MEAN/SUM | QuantizedS32 |
  560. +--------------------+-----------------------------------+-------------------+
  561. '''
  562. ),
  563. Doc('FLOAT_IO16xC32 = 1', 'Deprecated. This was replaced by '
  564. 'FLOAT_O16xC32, and input\'s dtype decided by actual input tensor.'),
  565. Doc('FLOAT_O32xC32 = 2', 'compute/output both are float32'),
  566. Doc('FLOAT_O16xC32 = 3', 'compute are float32, output float16'),
  567. Doc('QUINT_I8xO32 = 4', 'input quint8, compute and output are qint32'),
  568. Doc('QINT_I8xO32 = 5', 'input qint8, compute and output are qint32'),
  569. name_field='data_type'))
  570. (pdef('Reduce', 'reduce along given axis', version=2).
  571. add_enum('Mode',
  572. 'SUM = 0',
  573. Doc('SUM_SQR = 1', 'sum of x * x for each element x'),
  574. 'PRODUCT = 2', 'MIN = 3', 'MAX = 4', 'MEAN = 5').
  575. add_fields('int32',
  576. Doc('axis',
  577. 'axis along which reduction is performed; if INT_MAX is given, '
  578. 'reduce to given target shape (only used in megbrain)'),
  579. (1<<31)-1).
  580. add_enum('DataType',
  581. Doc('DEFAULT = 0',
  582. '''
  583. input/output are the same data type, and the internal computation type would be chosen by the input/output dtypes and the reduction mode.
  584. Currently, ```DEFAULT``` mode means:
  585. +--------------------+-----------------------------------+-------------------+
  586. | Input/Output DType | Mode | Computation DType |
  587. +====================+===================================+===================+
  588. | FLOAT32 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | FLOAT32 |
  589. +--------------------+-----------------------------------+-------------------+
  590. | FLOAT16 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | FLOAT16 |
  591. +--------------------+-----------------------------------+-------------------+
  592. | INT32 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | INT32 |
  593. +--------------------+-----------------------------------+-------------------+
  594. | INT8 | MIN/MAX/MEAN/SUM/SUM_SQR/PRODUCT | INT8 |
  595. +--------------------+-----------------------------------+-------------------+
  596. | QuantizedS8 | MIN/MAX | QuantizedS8 |
  597. +--------------------+-----------------------------------+-------------------+
  598. | QuantizedS8 | MEAN/SUM | QuantizedS32 |
  599. +--------------------+-----------------------------------+-------------------+
  600. | Quantized8Asymm | MIN/MAX | Quantized8Asymm |
  601. +--------------------+-----------------------------------+-------------------+
  602. | Quantized8Asymm | MEAN/SUM | QuantizedS32 |
  603. +--------------------+-----------------------------------+-------------------+
  604. '''
  605. ),
  606. Doc('FLOAT_IO16xC32 = 1', 'Deprecated. This was replaced by '
  607. 'FLOAT_O16xC32, and input\'s dtype decided by actual input tensor.'),
  608. Doc('FLOAT_O32xC32 = 2', 'compute/output both are float32'),
  609. Doc('FLOAT_O16xC32 = 3', 'compute are float32, output float16'),
  610. Doc('QUINT_I8xO32 = 4', 'input quint8, compute and output are qint32'),
  611. Doc('QINT_I8xO32 = 5', 'input qint8, compute and output are qint32'),
  612. name_field='data_type'))
  613. (pdef('Cumsum', 'calculate accumulated sum along given axis', version=0, is_legacy=True).
  614. add_fields('int32',
  615. Doc('axis',
  616. 'axis along which cumsum is performed'),
  617. -1).
  618. add_fields('bool',
  619. Doc('exclusive',
  620. 'whether the current element is taken into account'),
  621. 'true').
  622. add_fields('bool',
  623. Doc('reverse',
  624. 'whether the cumsum is forward or backward'),
  625. 'false'))
  626. (pdef('Cumsum', 'calculate accumulated sum along given axis', version=1).
  627. add_fields('int32',
  628. Doc('axis',
  629. 'axis along which cumsum is performed, default with INT_MAX'),
  630. (1<<31)-1).
  631. add_fields('bool',
  632. Doc('exclusive',
  633. 'whether the current element is taken into account'),
  634. 'true').
  635. add_fields('bool',
  636. Doc('reverse',
  637. 'whether the cumsum is forward or backward'),
  638. 'false'))
  639. (pdef('CondTake').
  640. add_enum('Mode',
  641. Doc('EQ = 0', 'take if ``abs(data-val)<eps``'),
  642. Doc('NEQ = 1', 'take if ``abs(data-val)>=eps``'),
  643. Doc('LT = 2', 'take if ``data<val``'),
  644. Doc('LEQ = 3', 'take if ``data<=val``'),
  645. Doc('GT = 4', 'take if ``data>val``'),
  646. Doc('GEQ = 5', 'take if ``data>=val``')).
  647. add_fields('float32',
  648. Doc('val', 'the value to be compared with; note that for integer '
  649. 'data, val is also converted to int'), 0).
  650. add_fields('float32', Doc('eps', 'used for float equality comparison'),
  651. 1e-6))
  652. pdef('Argsort').add_enum('Order', 'ASCENDING = 0', 'DESCENDING = 1')
  653. (pdef('IndexingRemap').
  654. add_fields('bool',
  655. Doc('is_non_overlapping',
  656. 'Whether no two dst element maps to the same src element. '
  657. 'Enabling this option can accelerate gradient operator since'
  658. ' atomic adding operations could be avoided.'),
  659. 'false'))
  660. pdef('Sleep').add_fields('float32', Doc('time', 'time to sleep in seconds'), 0)
  661. (pdef('Linspace').
  662. add_fields('bool',
  663. Doc('endpoint',
  664. 'Whether stop is included in the generated tensor'),
  665. 'true'))
  666. (pdef('LinspaceFull').
  667. add_fields('float64',
  668. Doc('start', 'The first val.'),
  669. 0).
  670. add_fields('float64',
  671. Doc('stop', 'The last val.'),
  672. 1).
  673. add_fields('bool',
  674. Doc('endpoint',
  675. 'Whether stop is included in the generated tensor'),
  676. 'true'))
  677. (pdef('Eye').
  678. add_fields(
  679. 'int32',
  680. Doc('k', 'Index of the diagonal: 0 (the default) refers to the main '
  681. 'diagonal, a positive value refers to an upper diagonal, and a '
  682. 'negative value to a lower diagonal.'),
  683. 0).
  684. add_fields(
  685. 'dtype', Doc('dtype', 'data type of output value'),
  686. 'DTypeEnum::Float32'))
  687. (pdef('UniformRNG', version=0, is_legacy=True).
  688. add_fields('uint64', 'seed', 0))
  689. (pdef('UniformRNG', version=1).
  690. add_fields('uint64', 'seed', 0).
  691. add_fields(
  692. 'dtype', Doc('dtype', 'The dtype of output Tensor. Only support Float32.'),
  693. 'DTypeEnum::Float32'))
  694. (pdef('GaussianRNG', version=0, is_legacy=True).
  695. add_fields('uint64', 'seed', 0).
  696. add_fields('float32', 'mean', 0, 'std', 1))
  697. (pdef('GaussianRNG', version=1).
  698. add_fields('uint64', 'seed', 0).
  699. add_fields('float32', 'mean', 0, 'std', 1).
  700. add_fields(
  701. 'dtype', Doc('dtype', 'The dtype of output Tensor. Only support Float32.'),
  702. 'DTypeEnum::Float32'))
  703. (pdef('GammaRNG').
  704. add_fields('uint64', 'seed', 0))
  705. (pdef('BetaRNG').
  706. add_fields('uint64', 'seed', 0))
  707. (pdef('PoissonRNG').
  708. add_fields('uint64', 'seed', 0))
  709. (pdef('PermutationRNG').
  710. add_fields('uint64', 'seed', 0).
  711. add_fields(
  712. 'dtype', Doc('dtype', 'The dtype of output Tensor. Int32, Int16 and '
  713. 'Float32 are supported.'),
  714. 'DTypeEnum::Int32'))
  715. (pdef('ShuffleRNG').
  716. add_fields('uint64', 'seed', 0))
  717. (pdef('Flip').
  718. add_fields('bool', 'vertical', 'false', 'horizontal', 'false'))
  719. (pdef('Rotate')
  720. .add_fields('bool', 'clockwise', 'true'))
  721. (pdef('ROICopy')
  722. .add_fields('uint32', 'row_from', 0, 'row_to', 0, 'col_from', 0, 'col_to', 0))
  723. (pdef('CvtColor')
  724. .add_enum('Mode', 'RGB2GRAY = 0', 'RGB2YUV = 1', 'YUV2RGB = 2', 'GRAY2RGB = 3', 'RGBA2RGB = 4',
  725. 'RGBA2BGR = 5', 'RGBA2GRAY = 6', 'RGB2BGR = 7', 'BGR2GRAY = 8', 'BGR2RGB = 9',
  726. Doc('YUV2GRAY_NV21 = 10', 'For historical reasons, referred to as YCC by opencv'),
  727. 'YUV2RGB_NV21 = 11', 'YUV2BGR_NV21 = 12', 'YUV2GRAY_NV12 = 13', 'YUV2RGB_NV12 = 14',
  728. 'YUV2BGR_NV12 = 15', 'YUV2GRAY_YV12 = 16', 'YUV2RGB_YV12 = 17', 'YUV2BGR_YV12 = 18',
  729. 'YUV2GRAY_YU12 = 19', 'YUV2RGB_YU12 = 20', 'YUV2BGR_YU12 = 21',
  730. 'YCrCb2RGB = 22', 'YCrCb2BGR = 23',
  731. Doc('BT601_YUV2RGB_NV21 = 24', 'BT601 yuv format, referred to as YUV by opencv'),
  732. 'BT601_YUV2BGR_NV21 = 25', 'BT601_YUV2RGB_NV12 = 26', 'BT601_YUV2BGR_NV12 = 27',
  733. 'BT601_YUV2RGB_YV12 = 28', 'BT601_YUV2BGR_YV12 = 29', 'BT601_YUV2RGB_YU12 = 30',
  734. 'BT601_YUV2BGR_YU12 = 31',
  735. member_alias=[('YUV2GRAY_NV21', 'BT601_YUV2GRAY_NV21'),
  736. ('YUV2GRAY_NV12', 'BT601_YUV2GRAY_NV12'),
  737. ('YUV2GRAY_YV12', 'BT601_YUV2GRAY_YV12'),
  738. ('YUV2GRAY_YU12', 'BT601_YUV2GRAY_YU12')],
  739. name_field = 'mode'))
  740. (pdef('WarpAffine', version=0, is_legacy=True)
  741. .add_enum_alias('InterpolationMode', 'WarpPerspectiveV1', name_field='imode')
  742. .add_enum_alias('BorderMode', 'WarpPerspectiveV1', name_field='border_mode')
  743. .add_fields('float32', Doc('border_val', 'used for CONSTANT bmode'), '.0f'))
  744. (pdef('WarpAffine', version=1, is_legacy=True)
  745. .add_enum_alias('InterpolationMode', 'WarpPerspectiveV1', name_field='imode')
  746. .add_enum_alias('BorderMode', 'WarpPerspectiveV1', name_field='border_mode')
  747. .add_fields('float32', Doc('border_val', 'used for CONSTANT bmode'), '.0f')
  748. .add_enum_alias('Format', 'ConvolutionV0', default=1))
  749. (pdef('WarpAffine', version=2)
  750. .add_enum_alias('InterpolationMode', 'WarpPerspectiveV1', name_field='imode')
  751. .add_enum_alias('BorderMode', 'WarpPerspectiveV1', name_field='border_mode')
  752. .add_fields('float32', Doc('border_val', 'used for CONSTANT bmode'), '.0f')
  753. .add_enum_alias('Format', 'Convolution', default=1))
  754. (pdef('GaussianBlur')
  755. .add_enum_alias('BorderMode', 'WarpPerspectiveV1', name_field='border_mode')
  756. .add_fields('uint32', 'kernel_height', 0, 'kernel_width', 0)
  757. .add_fields('float32','sigma_x', '0.f', 'sigma_y', '0.f'))
  758. (pdef('Resize', version=0, is_legacy=True)
  759. .add_enum_alias('InterpolationMode', 'WarpPerspectiveV1', name_field='imode'))
  760. (pdef('Resize', version=1, is_legacy=True)
  761. .add_enum_alias('InterpolationMode', 'WarpPerspectiveV1', name_field='imode')
  762. .add_enum_alias('Format', 'ConvolutionV0', default=1))
  763. (pdef('Resize', version=2)
  764. .add_enum_alias('InterpolationMode', 'WarpPerspectiveV1', name_field='imode')
  765. .add_enum_alias('Format', 'Convolution', default=1))
  766. (pdef('Remap', version=0,is_legacy=True)
  767. .add_enum_alias('InterpolationMode', 'WarpPerspectiveV1', name_field='imode')
  768. .add_enum_alias('BorderMode', 'WarpPerspectiveV1', name_field='border_type')
  769. .add_enum_alias('Format', 'ConvolutionV0', default=1)
  770. .add_fields('float32', 'scalar', '0.f'))
  771. (pdef('Remap', version=1)
  772. .add_enum_alias('InterpolationMode', 'WarpPerspectiveV1', name_field='imode')
  773. .add_enum_alias('BorderMode', 'WarpPerspectiveV1', name_field='border_type')
  774. .add_enum_alias('Format', 'Convolution', default=1)
  775. .add_fields('float32', 'scalar', '0.f'))
  776. (pdef('Convolution3D').
  777. add_enum('Mode', 'CROSS_CORRELATION = 0', 'CONVOLUTION = 1').
  778. add_fields(
  779. 'uint32',
  780. Doc('pad_d', 'padding on one side on the first dimension'), 0,
  781. Doc('pad_h', 'padding on one side on the second dimension'), 0,
  782. Doc('pad_w', 'padding on one side on the third dimension'), 0,
  783. Doc('stride_d', 'kernel stride on the first dimension'), 1,
  784. Doc('stride_h', 'kernel stride on the second dimension'), 1,
  785. Doc('stride_w', 'kernel stride on the third dimension'), 1,
  786. Doc('dilate_d', 'dilation (i.e. size of each zero-padded kernel block) '
  787. 'on the first dimension'), 1,
  788. Doc('dilate_h', 'dilation (i.e. size of each zero-padded kernel block) '
  789. 'on the second dimension'), 1,
  790. Doc('dilate_w', 'dilation (i.e. size of each zero-padded kernel block) '
  791. 'on the third dimension'), 1
  792. ).
  793. add_enum('Sparse',
  794. Doc('DENSE = 0', 'dense convolution: filter shape should be '
  795. '[oc, ic, spatial...] if format is NCDHW, '
  796. '[oc, spatial..., ic] if format is NDHWC'),
  797. Doc('GROUP = 1', 'group convolution: filter shape should be '
  798. '[group, oc_per_group, ic_per_group, spatial...] if format is NCDHW, '
  799. '[group, oc_per_group, spatial..., ic_per_group] if format is NDHWC')
  800. ).
  801. add_enum('DataType',
  802. Doc('FLOAT = 0', 'input/output both float32/float16'),
  803. Doc('FLOAT_IO16xC32 = 1', 'input/output both float16, the internal '
  804. 'compute is float32'),
  805. name_field='data_type').
  806. add_enum('Format', 'NCDHW = 0', 'NDHWC = 1')
  807. )
  808. (pdef('Conv3DBias').
  809. add_enum('NonlineMode', 'IDENTITY = 0', 'RELU = 1', 'SIGMOID = 2').
  810. add_enum_alias('Mode', 'Convolution3D').
  811. add_fields('uint32', 'pad_d', 0, 'pad_h', 0, 'pad_w', 0,
  812. 'stride_d', 1, 'stride_h', 1, 'stride_w', 0))
  813. (pdef('SeparableConv3D').
  814. add_enum_alias('Mode', 'Convolution3D').
  815. add_enum('BorderMode', 'BORDER_REPLICATE = 0', 'BORDER_REFLECT = 1',
  816. 'BORDER_REFLECT_101 = 2','BORDER_WRAP = 3',
  817. 'BORDER_CONSTANT = 4', 'BORDER_TRANSPARENT = 5','BORDER_ISOLATED = 6').
  818. add_fields('bool', 'is_symm_kernel', 'true').
  819. add_fields('uint32', 'pad_d', 0, 'pad_h', 0, 'pad_w', 0,
  820. 'stride_d', 0, 'stride_h', 1, 'stride_w', 1,
  821. 'ksize_d', 0, 'ksize_h', 3, 'ksize_w', 3,
  822. 'anchor_d', 0, 'anchor_h', 1, 'anchor_w', 1))
  823. (pdef('TopK').
  824. add_enum(
  825. 'Mode',
  826. Doc('KTH_ONLY = 0', "only the value of the k'th element would be computed"),
  827. Doc('VALUE_IDX_NOSORT = 1',
  828. 'all the top-k values and corresponding indices would be computed; '
  829. 'no order is guaranteed'),
  830. Doc('VALUE_IDX_SORTED = 2',
  831. 'all the top-k values and corresponding indices sorted'))
  832. )
  833. RELAYOUT_FORMAT_MODE_DOC = """
  834. Relayout mode.
  835. **Naming conventions**
  836. 1. ``A_B`` means change from layout format ``A`` to ``B``.
  837. 2. ``INTER_WEIGHT_xx`` means relayout the weight for faster processing by
  838. :attr:`Convolution.Format.NHWCD4` convolutions.
  839. 3. A suffix of ``I`` means ``Image2DPack4TensorFormat`` tensor format is used
  840. for faster processing on GPUs.
  841. **Layout definitions**
  842. * ``NCHW`` layout: ``{N, C, H, W}``
  843. * ``NHWC`` layout: ``{N, H, W, C}``
  844. * ``NHWCD4`` layout: ``{N, H, (C + 3) / 4, W, 4}``
  845. * ``NHWCD4I`` layout: with ``align_axis = 2``
  846. * ``NCHW4`` layout: ``{N, C/4, H, W, 4}``
  847. * ``NCHW88`` layout: ``{N, C/8, H, W, 8}``
  848. * ``CHWN4`` layout: ``{C/4, H, W, N, 4}``
  849. * ``NCHW64`` layout: ``{N, C/64, H, W, 64}``
  850. **Float weight transformation definitions**
  851. +---------------+---------------------------------+--------------------+--------------------------------------+------+
  852. | Sparsity Type | Input Layout | Input Req | Output Layout | Axis |
  853. +===============+=================================+====================+======================================+======+
  854. | DENSE | ``{OC, IC, FH, FW}`` | ``OC % 4 == 0`` | ``{OC/4, FH, FW, IC, 4}`` | 3 |
  855. +---------------+---------------------------------+--------------------+--------------------------------------+------+
  856. | GROUP | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 4 == 0`` | ``{GROUP, OCPG/4, FH, FW, ICPG, 4}`` | 4 |
  857. | | | ``ICPG % 4 == 0`` | | |
  858. +---------------+---------------------------------+--------------------+--------------------------------------+------+
  859. | CHAN | ``{GROUP, 1, 1, FH, FW}`` | ``GROUP % 4 == 0`` | ``{GROUP / 4, 1, FH ,FW, 4}`` | 1 |
  860. +---------------+---------------------------------+--------------------+--------------------------------------+------+
  861. **Float weight transformation nchw88 definitions**
  862. +---------------+---------------------------------+--------------------+--------------------------------------+
  863. | Sparsity Type | Input Layout | Input Req | Output Layout |
  864. +===============+=================================+====================+======================================+
  865. | DENSE | ``{OC, IC, FH, FW}`` | ``OC % 8 == 0`` |``{OC/8, IC/8 ,FH, FW, 8(IC), 8(OC)}``|
  866. | | | ``IC % 8 == 0`` | |
  867. +---------------+---------------------------------+--------------------+--------------------------------------+
  868. | GROUP | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 8 == 0`` | ``{GROUP, OCPG/8, ICPG/8 FH, FW, |
  869. | | | ``ICPG % 8 == 0`` | 8(ICPG), 8(OCPG)} `` |
  870. +---------------+---------------------------------+--------------------+--------------------------------------+
  871. | CHAN | ``{GROUP, 1, 1, FH, FW}`` | ``GROUP % 8 == 0`` | ``{GROUP / 8, 1, FH ,FW, 8}`` |
  872. +---------------+---------------------------------+--------------------+--------------------------------------+
  873. **Int8(DOT) weight transformation definitions**
  874. +---------------+---------------------------------+--------------------+------------------------------------------+------+
  875. | Sparsity Type | Input Layout | Input Req | Output Layout | Axis |
  876. +===============+=================================+====================+==========================================+======+
  877. | DENSE | ``{OC, IC, FH, FW}`` | ``OC % 4 == 0`` | ``{OC/4, FH, FW, IC/4, 4, 4}` | 3 |
  878. +---------------+---------------------------------+--------------------+------------------------------------------+------+
  879. | GROUP | ``{GROUP, OCPG, ICPG, FH, FW}`` | ``OCPG % 4 == 0`` | ``{GROUP, OCPG/4, FH, FW, ICPG/4, 4, 4}``| 4 |
  880. | | | ``ICPG % 4 == 0`` | | |
  881. +---------------+---------------------------------+--------------------+------------------------------------------+------+
  882. Note: the axis column means the corresponding ``align_axis`` for image format
  883. when the ``I`` suffix is present.
  884. Note: NCHW_NCHW4_WEIGHT will auto pad oc and ic, you should remove oc in later opr by seting group and oc param with NCHW4_NCHW
  885. """
  886. (pdef('RelayoutFormat', 'Change the tensor layout format', version=0, is_legacy=True).
  887. add_enum(
  888. Doc('Mode', RELAYOUT_FORMAT_MODE_DOC),
  889. 'NHWC_NHWCD4 = 0',
  890. 'NHWCD4_NHWC = 1',
  891. 'NHWC_NHWCD4I = 2',
  892. 'NCHW_NHWCD4 = 3',
  893. 'NCHW_NHWCD4I = 4',
  894. 'NHWCD4I_NCHW = 5',
  895. 'NHWCD4_NCHW = 6',
  896. 'INTER_WEIGHT_DENSE = 7',
  897. 'INTER_WEIGHT_DENSEI = 8',
  898. 'INTER_WEIGHT_GROUP = 9',
  899. 'INTER_WEIGHT_GROUPI = 10',
  900. 'INTER_WEIGHT_CHAN = 11',
  901. 'INTER_WEIGHT_CHANI = 12',
  902. 'INTER_WEIGHT_DENSEI_DOT = 13',
  903. 'INTER_WEIGHT_GROUPI_DOT = 14',
  904. 'NCHW4_CHWN4 = 15',
  905. 'CHWN4_NCHW4 = 16',
  906. 'NCHW_NCHW88_CONV_DENSE_WEIGHT = 17',
  907. 'NCHW_NCHW88_CONV_CHAN_WEIGHT = 18',
  908. 'NCHW_NCHW88_CONV_GROUP_WEIGHT = 19',
  909. 'NCHW_NCHW88 = 20',
  910. 'NCHW88_NCHW = 21',
  911. 'NCHW_NCHW4_IC_SMALL = 22',
  912. 'NCHW_NCHW4_IC_SMALL_CONV_DENSE_WEIGHT = 23',
  913. 'NCHW_NCHW4 = 24',
  914. 'NCHW4_NCHW = 25',
  915. 'NCHW_NCHW4_WEIGHT = 26',
  916. 'NCHW_NCHW64 = 27',
  917. 'NCHW64_NCHW = 28',
  918. 'NCHW_NHWC = 29',
  919. 'NHWC_NCHW = 30',
  920. )
  921. )
  922. (pdef('RelayoutFormat', 'Change the tensor layout format', version=1).
  923. add_enum_alias('Mode', 'RelayoutFormatV0').
  924. add_fields('uint32', 'oc', '0').
  925. add_fields('uint32', 'group', '1')
  926. )
  927. (pdef('SeparableFilter', version=0, is_legacy=True).
  928. add_enum_alias('Format', 'ConvolutionV0').
  929. add_enum_alias('BorderMode', 'WarpPerspectiveV1').
  930. add_fields('bool', 'is_symm_kernel', 'true').
  931. add_fields('uint32', 'ksize_h', 3, 'ksize_w', 3, 'anchor_h', 1, 'anchor_w', 1))
  932. (pdef('SeparableFilter', version=1).
  933. add_enum_alias('Format', 'Convolution').
  934. add_enum_alias('BorderMode', 'WarpPerspectiveV1').
  935. add_fields('bool', 'is_symm_kernel', 'true').
  936. add_fields('uint32', 'ksize_h', 3, 'ksize_w', 3, 'anchor_h', 1, 'anchor_w', 1))
  937. (pdef('LocalShare', 'Local share convolution',version=0, is_legacy=True).
  938. add_enum_alias('Mode', 'ConvolutionV0').
  939. add_fields(
  940. 'uint32',
  941. Doc('pad_h', 'padding on one side on the first dimension'), 0,
  942. Doc('pad_w', 'padding on one side on the second dimension'), 0,
  943. Doc('stride_h', 'kernel stride on the first dimension'), 1,
  944. Doc('stride_w', 'kernel stride on the second dimension'), 1,
  945. Doc('dilate_h', 'dilation (i.e. size of each zero-padded kernel block) '
  946. 'on the second dimension'), 1,
  947. Doc('dilate_w', 'dilation (i.e. size of each zero-padded kernel block) '
  948. 'on the second dimension'), 1,
  949. Doc('spatial_groups_h', 'spatial groups on the first dimension'), 1,
  950. Doc('spatial_groups_w', 'spatial groups on the second dimension'), 1
  951. ).
  952. add_enum_alias('Sparse', 'ConvolutionV0').
  953. add_enum_alias('Format', 'ConvolutionV0').
  954. add_enum_alias('ComputeMode', 'ConvolutionV1')
  955. )
  956. (pdef('LocalShare', 'Local share convolution', version=1).
  957. add_enum_alias('Mode', 'ConvolutionV0').
  958. add_fields(
  959. 'uint32',
  960. Doc('pad_h', 'padding on one side on the first dimension'), 0,
  961. Doc('pad_w', 'padding on one side on the second dimension'), 0,
  962. Doc('stride_h', 'kernel stride on the first dimension'), 1,
  963. Doc('stride_w', 'kernel stride on the second dimension'), 1,
  964. Doc('dilate_h', 'dilation (i.e. size of each zero-padded kernel block) '
  965. 'on the second dimension'), 1,
  966. Doc('dilate_w', 'dilation (i.e. size of each zero-padded kernel block) '
  967. 'on the second dimension'), 1,
  968. Doc('spatial_groups_h', 'spatial groups on the first dimension'), 1,
  969. Doc('spatial_groups_w', 'spatial groups on the second dimension'), 1
  970. ).
  971. add_enum_alias('Sparse', 'ConvolutionV0').
  972. add_enum_alias('Format', 'Convolution').
  973. add_enum_alias('ComputeMode', 'ConvolutionV1')
  974. )
  975. (pdef('ROIAlign',version=0,is_legacy=True).
  976. add_enum('Mode', 'MAX = 0', 'AVERAGE = 1', name_field='mode').
  977. add_enum_alias('Format', 'ConvolutionV0').
  978. add_fields('float32', 'spatial_scale', '1.0').
  979. add_fields('float32', 'offset', '0.0').
  980. add_fields('uint32',
  981. 'pooled_height', '1',
  982. 'pooled_width', '1',
  983. 'sample_height', '2',
  984. 'sample_width', '2')
  985. )
  986. (pdef('ROIAlign', version=1).
  987. add_enum_alias('Mode', 'ROIAlignV0', name_field='mode').
  988. add_enum_alias('Format', 'Convolution').
  989. add_fields('float32', 'spatial_scale', '1.0').
  990. add_fields('float32', 'offset', '0.0').
  991. add_fields('uint32',
  992. 'pooled_height', '1',
  993. 'pooled_width', '1',
  994. 'sample_height', '2',
  995. 'sample_width', '2')
  996. )
  997. (pdef('Correlation').
  998. add_enum_alias('Format', 'ConvolutionV0').
  999. add_fields('uint32', 'kernel_size', '1').
  1000. add_fields('uint32', 'max_displacement', '1').
  1001. add_fields('uint32', 'stride1', '1').
  1002. add_fields('uint32', 'stride2', '1').
  1003. add_fields('uint32', 'pad_size', '0').
  1004. add_fields('bool', 'is_multiply', 'true')
  1005. )
  1006. (pdef('DeformablePSROIPooling').
  1007. add_fields('bool', 'no_trans', 'true').
  1008. add_fields('float32', 'spatial_scale', 1,
  1009. 'trans_std', 1).
  1010. add_fields('uint32',
  1011. Doc('pooled_h', 'height of pooling output'), 1,
  1012. Doc('pooled_w', 'width of pooling output'), 1,
  1013. Doc('part_size', 'size of each deformable part'), 1,
  1014. Doc('sample_per_part', 'sample count of each bbox'), 1))
  1015. (pdef('BatchConvBias', 'Batch convolution (unshare weights on the batch dimension)',version=0,is_legacy=True).
  1016. add_enum_alias('NonlineMode', 'ConvBiasV0').
  1017. add_enum_alias('Mode', 'ConvolutionV0').
  1018. add_fields(
  1019. 'uint32',
  1020. Doc('pad_h', 'padding on one side on the first dimension'), 0,
  1021. Doc('pad_w', 'padding on one side on the second dimension'), 0,
  1022. Doc('stride_h', 'kernel stride on the first dimension'), 1,
  1023. Doc('stride_w', 'kernel stride on the second dimension'), 1,
  1024. Doc('dilate_h', 'dilation (i.e. size of each zero-padded kernel block) '
  1025. 'on the second dimension'), 1,
  1026. Doc('dilate_w', 'dilation (i.e. size of each zero-padded kernel block) '
  1027. 'on the second dimension'), 1,
  1028. ).
  1029. add_enum_alias('Sparse', 'ConvolutionV0').
  1030. add_enum_alias('Format', 'ConvolutionV0').
  1031. add_enum_alias('ComputeMode', 'ConvolutionV1', name_field="compute_mode")
  1032. )
  1033. (pdef('BatchConvBias', 'Batch convolution (unshare weights on the batch dimension)',version=1).
  1034. add_enum_alias('NonlineMode', 'ConvBiasV0').
  1035. add_enum_alias('Mode', 'ConvolutionV0').
  1036. add_fields(
  1037. 'uint32',
  1038. Doc('pad_h', 'padding on one side on the first dimension'), 0,
  1039. Doc('pad_w', 'padding on one side on the second dimension'), 0,
  1040. Doc('stride_h', 'kernel stride on the first dimension'), 1,
  1041. Doc('stride_w', 'kernel stride on the second dimension'), 1,
  1042. Doc('dilate_h', 'dilation (i.e. size of each zero-padded kernel block) '
  1043. 'on the second dimension'), 1,
  1044. Doc('dilate_w', 'dilation (i.e. size of each zero-padded kernel block) '
  1045. 'on the second dimension'), 1,
  1046. ).
  1047. add_enum_alias('Sparse', 'ConvolutionV0').
  1048. add_enum_alias('Format', 'Convolution').
  1049. add_enum_alias('ComputeMode', 'ConvolutionV1', name_field="compute_mode")
  1050. )
  1051. (pdef('FakeQuant').
  1052. add_fields('int32','qmin','-2147483648').
  1053. add_fields('int32','qmax','2147483647')
  1054. )
  1055. (pdef('TQT').
  1056. add_fields('int32', 'qmin', '-2147483648').
  1057. add_fields('int32', 'qmax', '2147483647')
  1058. )
  1059. (pdef('LSQ').
  1060. add_fields('int32', 'qmin', '-2147483648').
  1061. add_fields('int32', 'qmax', '2147483647')
  1062. )
  1063. pdef('Fill').add_fields('float32', 'value', '0')
  1064. PADDING_MODES = [Doc('REPLICATE = 0', 'aaaaaa|abcdefgh|hhhhhhh'),
  1065. Doc('REFLECT = 1', 'fedcba|abcdefgh|hgfedcb'),
  1066. Doc('CONSTANT = 2', 'iiiiii|abcdefgh|iiiiiii')]
  1067. (pdef('Padding').
  1068. add_fields('uint32', Doc('front_offset_dim0','offset in dim 0'), 0).
  1069. add_fields('uint32', Doc('front_offset_dim1','offset in dim 1'), 0).
  1070. add_fields('uint32', Doc('front_offset_dim2','offset in dim 2'), 0).
  1071. add_fields('uint32', Doc('front_offset_dim3','offset in dim 3'), 0).
  1072. add_fields('uint32', Doc('front_offset_dim4','offset in dim 4'), 0).
  1073. add_fields('uint32', Doc('front_offset_dim5','offset in dim 5'), 0).
  1074. add_fields('uint32', Doc('front_offset_dim6','offset in dim 6'), 0).
  1075. add_fields('uint32', Doc('back_offset_dim0', 'back offset in dim0'), 0).
  1076. add_fields('uint32', Doc('back_offset_dim1', 'back offset in dim1'), 0).
  1077. add_fields('uint32', Doc('back_offset_dim2', 'back offset in dim2'), 0).
  1078. add_fields('uint32', Doc('back_offset_dim3', 'back offset in dim3'), 0).
  1079. add_fields('uint32', Doc('back_offset_dim4', 'back offset in dim4'), 0).
  1080. add_fields('uint32', Doc('back_offset_dim5', 'back offset in dim5'), 0).
  1081. add_fields('uint32', Doc('back_offset_dim6', 'back offset in dim6'), 0).
  1082. add_fields('float32', Doc('padding_val','param of padding opr'), 0).
  1083. add_enum('PaddingMode', *PADDING_MODES,
  1084. name_field='padding_mode', default=2,
  1085. member_alias=[(i, 'PADDING_{}'.format(i)) for i in PADDING_MODES]
  1086. )
  1087. )

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台