|
|
@@ -3,7 +3,7 @@ pdef('Empty') |
|
|
|
pdef('Axis').add_fields('int32', 'axis', 0) |
|
|
|
|
|
|
|
(pdef('Convolution', version=0, is_legacy=True). |
|
|
|
add_enum('Mode', 'CROSS_CORRELATION', 'CONVOLUTION'). |
|
|
|
add_enum('Mode', 'CROSS_CORRELATION = 0', 'CONVOLUTION = 1'). |
|
|
|
add_fields( |
|
|
|
'uint32', |
|
|
|
Doc('pad_h', 'padding on one side on the first dimension'), 0, |
|
|
@@ -16,41 +16,41 @@ pdef('Axis').add_fields('int32', 'axis', 0) |
|
|
|
'on the second dimension'), 1 |
|
|
|
). |
|
|
|
add_enum('DataType', |
|
|
|
Doc('FLOAT', 'input/output both float32/float16'), |
|
|
|
'INT8x8x16', |
|
|
|
'INT8x8x32', |
|
|
|
Doc('FLOAT_IO16xC32', 'input/output both float16, the internal ' |
|
|
|
Doc('FLOAT = 0', 'input/output both float32/float16'), |
|
|
|
'INT8x8x16 = 1', |
|
|
|
'INT8x8x32 = 2', |
|
|
|
Doc('FLOAT_IO16xC32 = 3', 'input/output both float16, the internal ' |
|
|
|
'compute is float32'), |
|
|
|
Doc('QUINT8x8x32', 'input QuantizedAsymm8, output QuantizedS32'), |
|
|
|
Doc('INT8x8xX', 'input int8, output specified by tensor DType'), |
|
|
|
Doc('QUINT4x4x32', 'input QuantizedAsymm4, output QuantizedS32'), |
|
|
|
Doc('QUINT8x8x32 = 4', 'input QuantizedAsymm8, output QuantizedS32'), |
|
|
|
Doc('INT8x8xX = 5', 'input int8, output specified by tensor DType'), |
|
|
|
Doc('QUINT4x4x32 = 6', 'input QuantizedAsymm4, output QuantizedS32'), |
|
|
|
name_field='data_type'). |
|
|
|
add_enum('Sparse', |
|
|
|
Doc('DENSE', 'dense convolution: filter shape should be ' |
|
|
|
Doc('DENSE = 0', 'dense convolution: filter shape should be ' |
|
|
|
'[oc, ic, spatial...] if format is NCHW, ' |
|
|
|
'[oc, spatial..., ic] if format is NHWC'), |
|
|
|
Doc('GROUP', 'group convolution: filter shape should be ' |
|
|
|
Doc('GROUP = 1', 'group convolution: filter shape should be ' |
|
|
|
'[group, oc_per_group, ic_per_group, spatial...] if format is NCHW, ' |
|
|
|
'[group, oc_per_group, spatial..., ic_per_group] if format is NHWC') |
|
|
|
). |
|
|
|
add_enum(Doc('Format', 'convolution data/filter/output format; see ' |
|
|
|
':class:`RelayoutFormat` for more details'), |
|
|
|
'NCHW', 'NHWC', 'NHWCD4', 'NCHW4', 'NCHW8', 'NCHW32', 'NCHW88', |
|
|
|
'NCHW44','NCHW44_DOT', |
|
|
|
Doc('NCHW_WINOGRAD', 'NCHW layout with weights tranformed by winograd'), |
|
|
|
Doc('NCHW88_WINOGRAD', 'NCHW88 layout with weights tranformed by winograd'), |
|
|
|
Doc('NCHW44_WINOGRAD', 'NCHW44 layout with weights tranformed by winograd'), |
|
|
|
Doc('NCHW4_NCHW32', 'NCHW4_NCHW32 means input tensors are nchw4 layout, output tensor is nchw32 layout'), |
|
|
|
Doc('NCHW32_NCHW4', 'NCHW32_NCHW4 means input tensors are nchw32 layout, output tensor is nchw4 layout'), |
|
|
|
Doc('NCHW4_NCHW', 'NCHW4_NCHW means input tensors are nchw4 layout, output tensor is nchw layout'), |
|
|
|
Doc('NCHW4_NHWC', 'NCHW4_NHWC means input tensors are nchw4 layout, output tensor is nhwc layout'), |
|
|
|
Doc('NHWC_NCHW', 'NHWC_NCHW means input tensors are nhwc layout, ' |
|
|
|
'NCHW = 0', 'NHWC = 1', 'NHWCD4 = 2', 'NCHW4 = 3', 'NCHW8 = 4', 'NCHW32 = 5', 'NCHW88 = 6', |
|
|
|
'NCHW44 = 7','NCHW44_DOT = 8', |
|
|
|
Doc('NCHW_WINOGRAD = 9', 'NCHW layout with weights tranformed by winograd'), |
|
|
|
Doc('NCHW88_WINOGRAD = 10', 'NCHW88 layout with weights tranformed by winograd'), |
|
|
|
Doc('NCHW44_WINOGRAD = 11', 'NCHW44 layout with weights tranformed by winograd'), |
|
|
|
Doc('NCHW4_NCHW32 = 12', 'NCHW4_NCHW32 means input tensors are nchw4 layout, output tensor is nchw32 layout'), |
|
|
|
Doc('NCHW32_NCHW4 = 13', 'NCHW32_NCHW4 means input tensors are nchw32 layout, output tensor is nchw4 layout'), |
|
|
|
Doc('NCHW4_NCHW = 14', 'NCHW4_NCHW means input tensors are nchw4 layout, output tensor is nchw layout'), |
|
|
|
Doc('NCHW4_NHWC = 15', 'NCHW4_NHWC means input tensors are nchw4 layout, output tensor is nhwc layout'), |
|
|
|
Doc('NHWC_NCHW = 16', 'NHWC_NCHW means input tensors are nhwc layout, ' |
|
|
|
'output tensor is nchw layout'), |
|
|
|
Doc('NHWC_NCHW4_IC_SMALL', 'NHWC_NCHW4_IC_SMALL means input tensors are nhwc(c < 4) layout, ' |
|
|
|
Doc('NHWC_NCHW4_IC_SMALL = 17', 'NHWC_NCHW4_IC_SMALL means input tensors are nhwc(c < 4) layout, ' |
|
|
|
'output tensor is nchw4 layout, padding c=4'), |
|
|
|
Doc('NCHW_NCHW4_IC_SMALL', 'NCHW_NCHW4_IC_SMALL means input tensors are nchw(c < 4) layout, ' |
|
|
|
Doc('NCHW_NCHW4_IC_SMALL = 18', 'NCHW_NCHW4_IC_SMALL means input tensors are nchw(c < 4) layout, ' |
|
|
|
'output tensor is nchw4 layout, padding c=4'), |
|
|
|
Doc('CHWN4', 'CHWN4 is currently only used on Nvidia platform for fast implementation ' |
|
|
|
Doc('CHWN4 = 19', 'CHWN4 is currently only used on Nvidia platform for fast implementation ' |
|
|
|
'of convolution using CUDA/SASS. The channels are splitted to groups of 4 channels.')) |
|
|
|
) |
|
|
|
|
|
|
@@ -72,9 +72,9 @@ pdef('Axis').add_fields('int32', 'axis', 0) |
|
|
|
add_enum(Doc('ComputeMode', 'Specifies special computation modes, e.g. ' |
|
|
|
'different combinations of intermediate result ' |
|
|
|
'data types.'), |
|
|
|
Doc('DEFAULT', 'No special requirements on the precision of ' |
|
|
|
Doc('DEFAULT = 0', 'No special requirements on the precision of ' |
|
|
|
'intermediate results.'), |
|
|
|
Doc('FLOAT32', 'Use Float32 accumulator and intermediate result. ' |
|
|
|
Doc('FLOAT32 = 1', 'Use Float32 accumulator and intermediate result. ' |
|
|
|
'Only supported when input and output is Float16.'), |
|
|
|
name_field='compute_mode') |
|
|
|
) |
|
|
@@ -95,21 +95,21 @@ pdef('Axis').add_fields('int32', 'axis', 0) |
|
|
|
add_enum_alias('Sparse', 'ConvolutionV0'). |
|
|
|
add_enum(Doc('Format', 'convolution data/filter/output format; see ' |
|
|
|
':class:`RelayoutFormat` for more details'), |
|
|
|
'NCHW', 'NHWC', 'NHWCD4', 'NCHW4', 'NCHW8', 'NCHW32', 'NCHW88', |
|
|
|
'NCHW44','NCHW44_DOT', |
|
|
|
Doc('NCHW4_NCHW32', 'NCHW4_NCHW32 means input tensors are nchw4 layout, output tensor is nchw32 layout'), |
|
|
|
Doc('NCHW32_NCHW4', 'NCHW32_NCHW4 means input tensors are nchw32 layout, output tensor is nchw4 layout'), |
|
|
|
Doc('NCHW4_NCHW', 'NCHW4_NCHW means input tensors are nchw4 layout, output tensor is nchw layout'), |
|
|
|
Doc('NCHW4_NHWC', 'NCHW4_NHWC means input tensors are nchw4 layout, output tensor is nhwc layout'), |
|
|
|
Doc('NHWC_NCHW', 'NHWC_NCHW means input tensors are nhwc layout, ' |
|
|
|
'NCHW = 0', 'NHWC = 1', 'NHWCD4 = 2', 'NCHW4 = 3', 'NCHW8 = 4', 'NCHW32 = 5', 'NCHW88 = 6', |
|
|
|
'NCHW44 = 7','NCHW44_DOT = 8', |
|
|
|
Doc('NCHW4_NCHW32 = 9', 'NCHW4_NCHW32 means input tensors are nchw4 layout, output tensor is nchw32 layout'), |
|
|
|
Doc('NCHW32_NCHW4 = 10', 'NCHW32_NCHW4 means input tensors are nchw32 layout, output tensor is nchw4 layout'), |
|
|
|
Doc('NCHW4_NCHW = 11', 'NCHW4_NCHW means input tensors are nchw4 layout, output tensor is nchw layout'), |
|
|
|
Doc('NCHW4_NHWC = 12', 'NCHW4_NHWC means input tensors are nchw4 layout, output tensor is nhwc layout'), |
|
|
|
Doc('NHWC_NCHW = 13', 'NHWC_NCHW means input tensors are nhwc layout, ' |
|
|
|
'output tensor is nchw layout'), |
|
|
|
Doc('NHWC_NCHW4_IC_SMALL', 'NHWC_NCHW4_IC_SMALL means input tensors are nhwc(c < 4) layout, ' |
|
|
|
Doc('NHWC_NCHW4_IC_SMALL = 14', 'NHWC_NCHW4_IC_SMALL means input tensors are nhwc(c < 4) layout, ' |
|
|
|
'output tensor is nchw4 layout, padding c=4'), |
|
|
|
Doc('NCHW_NCHW4_IC_SMALL', 'NCHW_NCHW4_IC_SMALL means input tensors are nchw(c < 4) layout, ' |
|
|
|
Doc('NCHW_NCHW4_IC_SMALL = 15', 'NCHW_NCHW4_IC_SMALL means input tensors are nchw(c < 4) layout, ' |
|
|
|
'output tensor is nchw4 layout, padding c=4'), |
|
|
|
Doc('CHWN4', 'CHWN4 is currently only used on Nvidia platform for fast implementation ' |
|
|
|
Doc('CHWN4 = 16', 'CHWN4 is currently only used on Nvidia platform for fast implementation ' |
|
|
|
'of convolution using CUDA/SASS. The channels are splitted to groups of 4 channels.'), |
|
|
|
Doc('NCHW64', 'NCHW64 is designed for convolution implementation to utilizing TensorCore ' |
|
|
|
Doc('NCHW64 = 17', 'NCHW64 is designed for convolution implementation to utilizing TensorCore ' |
|
|
|
'instructions for 4-bit integers on Nvidia platforms')). |
|
|
|
add_enum_alias('ComputeMode', 'ConvolutionV1',name_field='compute_mode') |
|
|
|
) |
|
|
@@ -129,15 +129,15 @@ pdef('Axis').add_fields('int32', 'axis', 0) |
|
|
|
) |
|
|
|
|
|
|
|
(pdef('ConvPooling'). |
|
|
|
add_enum('Method', 'WITH_TEXTURE_OBJ', 'WITH_SHARED_MEM'). |
|
|
|
add_enum('Method', 'WITH_TEXTURE_OBJ = 0', 'WITH_SHARED_MEM = 1'). |
|
|
|
add_enum_alias('ConvMode', 'ConvolutionV0', 'Mode'). |
|
|
|
add_enum('PoolMode', 'AVERAGE', 'MAX'). |
|
|
|
add_enum('NonlineMode', 'IDENTITY', 'RELU', 'SIGMOID'). |
|
|
|
add_enum('PoolMode', 'AVERAGE = 0', 'MAX = 1'). |
|
|
|
add_enum('NonlineMode', 'IDENTITY = 0', 'RELU = 1', 'SIGMOID = 2'). |
|
|
|
add_fields('uint32', 'pool_shape_h', 1, 'pool_shape_w', 1, 'pool_stride_h', 1, 'pool_stride_w', 1, \ |
|
|
|
'pool_pad_h', 0, 'pool_pad_w', 0, 'conv_stride_h', 1, 'conv_stride_w', 1, 'conv_pad_h', 0, 'conv_pad_w', 0)) |
|
|
|
|
|
|
|
(pdef('ConvBias', 'legacy conv_bias', version=0, is_legacy=True). |
|
|
|
add_enum('NonlineMode', 'IDENTITY', 'RELU', 'SIGMOID', 'H_SWISH'). |
|
|
|
add_enum('NonlineMode', 'IDENTITY = 0', 'RELU = 1', 'SIGMOID = 2', 'H_SWISH = 3'). |
|
|
|
add_enum_alias('Mode', 'ConvolutionV0'). |
|
|
|
add_fields('uint32', 'pad_h', 0, 'pad_w', 0, 'stride_h', 1, 'stride_w', 1)) |
|
|
|
|
|
|
@@ -215,9 +215,9 @@ pdef('Axis').add_fields('int32', 'axis', 0) |
|
|
|
) |
|
|
|
(pdef('SeparableConv'). |
|
|
|
add_enum_alias('Mode', 'ConvolutionV0'). |
|
|
|
add_enum('BorderMode', 'BORDER_REPLICATE', 'BORDER_REFLECT', |
|
|
|
'BORDER_REFLECT_101','BORDER_WRAP', |
|
|
|
'BORDER_CONSTANT', 'BORDER_TRANSPARENT','BORDER_ISOLATED'). |
|
|
|
add_enum('BorderMode', 'BORDER_REPLICATE = 0', 'BORDER_REFLECT = 1', |
|
|
|
'BORDER_REFLECT_101 = 2','BORDER_WRAP = 3', |
|
|
|
'BORDER_CONSTANT = 4', 'BORDER_TRANSPARENT = 5','BORDER_ISOLATED = 6'). |
|
|
|
add_fields('bool', 'is_symm_kernel', 'true'). |
|
|
|
add_fields('uint32', 'pad_h', 0, 'pad_w', 0, 'stride_h', 1, 'stride_w', 1, |
|
|
|
'ksize_h', 3, 'ksize_w', 3, 'anchor_h', 1, 'anchor_w', 1)) |
|
|
@@ -233,11 +233,11 @@ pdef('Axis').add_fields('int32', 'axis', 0) |
|
|
|
(pdef('Pooling', version=0, is_legacy=True). |
|
|
|
add_enum( |
|
|
|
'Mode', |
|
|
|
Doc('MAX', 'maximum value inside pooling window'), |
|
|
|
Doc('AVERAGE', |
|
|
|
Doc('MAX = 0', 'maximum value inside pooling window'), |
|
|
|
Doc('AVERAGE = 1', |
|
|
|
'arithmetic mean of all values inside pooling window. Padding values ' |
|
|
|
'are taken into account and are viewed as zero'), |
|
|
|
Doc('AVERAGE_COUNT_EXCLUDE_PADDING', |
|
|
|
Doc('AVERAGE_COUNT_EXCLUDE_PADDING = 2', |
|
|
|
'arithmetic mean of all values inside pooling window. No padding is' |
|
|
|
'used.') |
|
|
|
). |
|
|
@@ -273,15 +273,15 @@ pdef('Axis').add_fields('int32', 'axis', 0) |
|
|
|
(pdef('BN'). |
|
|
|
add_enum( |
|
|
|
'ParamDim', |
|
|
|
Doc('DIM_11HW', 'Dim of params (Sigma, Mu) is 1 x 1 x H x W'), |
|
|
|
Doc('DIM_1CHW', 'Dim of params (Sigma, Mu) is 1 x C x H x W'), |
|
|
|
Doc('DIM_1C11', 'Dim of params (Sigma, Mu) is 1 x C x 1 x 1'), |
|
|
|
Doc('DIM_11HW = 0', 'Dim of params (Sigma, Mu) is 1 x 1 x H x W'), |
|
|
|
Doc('DIM_1CHW = 1', 'Dim of params (Sigma, Mu) is 1 x C x H x W'), |
|
|
|
Doc('DIM_1C11 = 2', 'Dim of params (Sigma, Mu) is 1 x C x 1 x 1'), |
|
|
|
name_field='param_dim' |
|
|
|
). |
|
|
|
add_enum( |
|
|
|
'FwdMode', |
|
|
|
Doc('TRAINING', 'Training phase.'), |
|
|
|
Doc('INFERENCE', 'Inference phase.'), |
|
|
|
Doc('TRAINING = 0', 'Training phase.'), |
|
|
|
Doc('INFERENCE = 1', 'Inference phase.'), |
|
|
|
name_field='fwd_mode' |
|
|
|
). |
|
|
|
add_fields('float64', 'epsilon', '1e-4f'). |
|
|
@@ -293,22 +293,22 @@ pdef('Axis').add_fields('int32', 'axis', 0) |
|
|
|
(pdef('ROIPooling'). |
|
|
|
add_enum( |
|
|
|
'Mode', |
|
|
|
Doc('MAX', 'maximum value inside pooling window; pooling result would ' |
|
|
|
Doc('MAX = 0', 'maximum value inside pooling window; pooling result would ' |
|
|
|
'be 0 if pooling window is empty'), |
|
|
|
Doc('AVERAGE', |
|
|
|
Doc('AVERAGE = 1', |
|
|
|
'arithmetic mean of all values inside pooling window; pooling result ' |
|
|
|
'would be 0 if pooling window is empty') |
|
|
|
). |
|
|
|
add_fields('float32', 'scale', '1.f')) |
|
|
|
|
|
|
|
INTERP_MODES = ['NEAREST', 'LINEAR', 'AREA', 'CUBIC', 'LANCZOS4'] |
|
|
|
BORDER_MODES = [Doc('REPLICATE', 'aaaaaa|abcdefgh|hhhhhhh'), |
|
|
|
Doc('REFLECT', 'fedcba|abcdefgh|hgfedcb'), |
|
|
|
Doc('REFLECT_101', 'gfedcb|abcdefgh|gfedcba'), |
|
|
|
Doc('WRAP', 'cdefgh|abcdefgh|abcdefg'), |
|
|
|
Doc('CONSTANT', 'iiiiii|abcdefgh|iiiiiii'), |
|
|
|
Doc('TRANSPARENT', ''), |
|
|
|
Doc('ISOLATED', '')] |
|
|
|
INTERP_MODES = ['NEAREST = 0', 'LINEAR = 1', 'AREA = 2', 'CUBIC = 3', 'LANCZOS4 = 4'] |
|
|
|
BORDER_MODES = [Doc('REPLICATE = 0', 'aaaaaa|abcdefgh|hhhhhhh'), |
|
|
|
Doc('REFLECT = 1', 'fedcba|abcdefgh|hgfedcb'), |
|
|
|
Doc('REFLECT_101 = 2', 'gfedcb|abcdefgh|gfedcba'), |
|
|
|
Doc('WRAP = 3', 'cdefgh|abcdefgh|abcdefg'), |
|
|
|
Doc('CONSTANT = 4', 'iiiiii|abcdefgh|iiiiiii'), |
|
|
|
Doc('TRANSPARENT = 5', ''), |
|
|
|
Doc('ISOLATED = 6', '')] |
|
|
|
(pdef('WarpPerspective', version=1, is_legacy=True). |
|
|
|
add_enum('InterpolationMode', *INTERP_MODES, |
|
|
|
name_field='imode', default=1, |
|
|
@@ -328,181 +328,181 @@ BORDER_MODES = [Doc('REPLICATE', 'aaaaaa|abcdefgh|hhhhhhh'), |
|
|
|
add_fields('float32', Doc('border_val', 'used for CONSTANT bmode'), '.0f')) |
|
|
|
|
|
|
|
|
|
|
|
pdef('SpatialTfGridGenerator').add_enum('Mode', 'AFFINE') |
|
|
|
pdef('SpatialTfSampler').add_enum('Mode', 'BILINEAR') |
|
|
|
pdef('SpatialTfGridGenerator').add_enum('Mode', 'AFFINE = 0') |
|
|
|
pdef('SpatialTfSampler').add_enum('Mode', 'BILINEAR = 0') |
|
|
|
|
|
|
|
pdef('AddUpdate').add_fields( |
|
|
|
'float32', 'alpha', '1.f', 'beta', '1.f', 'bias', '0.f') |
|
|
|
|
|
|
|
pdef('Elemwise').add_enum( |
|
|
|
'Mode', |
|
|
|
Doc('RELU', 'unary: max(x, 0)'), |
|
|
|
Doc('ABS', 'unary: abs(x)'), |
|
|
|
Doc('ACOS', 'unary: acos(x)'), |
|
|
|
Doc('ASIN', 'unary: asin(x)'), |
|
|
|
Doc('CEIL', 'unary: ceil(x)'), |
|
|
|
Doc('COS', 'unary: cos(x)'), |
|
|
|
Doc('EXP', 'unary: exp(x)'), |
|
|
|
Doc('EXPM1', 'unary: numerically stable exp(x)-1'), |
|
|
|
Doc('FLOOR', 'unary: floor(x)'), |
|
|
|
Doc('LOG', 'unary: natural logarithm, log(x)'), |
|
|
|
Doc('LOG1P', 'unary: numerically stable log(x+1)'), |
|
|
|
Doc('NEGATE', 'unary: -x'), |
|
|
|
Doc('SIGMOID', 'unary: 1/(1+exp(-x))'), |
|
|
|
Doc('SIN', 'unary: sin(x)'), |
|
|
|
Doc('TANH', 'unary: tanh(x)'), |
|
|
|
|
|
|
|
Doc('ABS_GRAD', 'binary: x > 0 ? y : -y'), |
|
|
|
Doc('ADD', 'binary: x + y'), |
|
|
|
Doc('FLOOR_DIV', 'binary: floor(x / y)'), |
|
|
|
Doc('MAX', 'binary: max(x, y)'), |
|
|
|
Doc('MIN', 'binary: min(x, y)'), |
|
|
|
Doc('MOD', 'binary: x % y or fmodf(x, y)'), |
|
|
|
Doc('MUL', 'binary: x * y'), |
|
|
|
Doc('POW', 'binary: pow(x, y)'), |
|
|
|
Doc('SIGMOID_GRAD', 'binary: x * (1 - x) * y'), |
|
|
|
Doc('SUB', 'binary: x - y'), |
|
|
|
Doc('SWITCH_GT0', 'binary: (x > 0) * y'), |
|
|
|
Doc('TANH_GRAD', 'binary: (1 - x * x) * y'), |
|
|
|
Doc('TRUE_DIV', 'binary: x / y'), |
|
|
|
Doc('LOG_SUM_EXP', 'binary: numerically stable log(exp(x) + exp(y))'), |
|
|
|
|
|
|
|
Doc('LT', 'binary: x < y'), |
|
|
|
Doc('LEQ', 'binary: x <= y'), |
|
|
|
Doc('EQ', 'binary: x == y'), |
|
|
|
|
|
|
|
Doc('SHL', 'bitwise binary: x << y. ' |
|
|
|
Doc('RELU = 0', 'unary: max(x, 0)'), |
|
|
|
Doc('ABS = 1', 'unary: abs(x)'), |
|
|
|
Doc('ACOS = 2', 'unary: acos(x)'), |
|
|
|
Doc('ASIN = 3', 'unary: asin(x)'), |
|
|
|
Doc('CEIL = 4', 'unary: ceil(x)'), |
|
|
|
Doc('COS = 5', 'unary: cos(x)'), |
|
|
|
Doc('EXP = 6', 'unary: exp(x)'), |
|
|
|
Doc('EXPM1 = 7', 'unary: numerically stable exp(x)-1'), |
|
|
|
Doc('FLOOR = 8', 'unary: floor(x)'), |
|
|
|
Doc('LOG = 9', 'unary: natural logarithm, log(x)'), |
|
|
|
Doc('LOG1P = 10', 'unary: numerically stable log(x+1)'), |
|
|
|
Doc('NEGATE = 11', 'unary: -x'), |
|
|
|
Doc('SIGMOID = 12', 'unary: 1/(1+exp(-x))'), |
|
|
|
Doc('SIN = 13', 'unary: sin(x)'), |
|
|
|
Doc('TANH = 14', 'unary: tanh(x)'), |
|
|
|
|
|
|
|
Doc('ABS_GRAD = 15', 'binary: x > 0 ? y : -y'), |
|
|
|
Doc('ADD = 16', 'binary: x + y'), |
|
|
|
Doc('FLOOR_DIV = 17', 'binary: floor(x / y)'), |
|
|
|
Doc('MAX = 18', 'binary: max(x, y)'), |
|
|
|
Doc('MIN = 19', 'binary: min(x, y)'), |
|
|
|
Doc('MOD = 20', 'binary: x % y or fmodf(x, y)'), |
|
|
|
Doc('MUL = 21', 'binary: x * y'), |
|
|
|
Doc('POW = 22', 'binary: pow(x, y)'), |
|
|
|
Doc('SIGMOID_GRAD = 23', 'binary: x * (1 - x) * y'), |
|
|
|
Doc('SUB = 24', 'binary: x - y'), |
|
|
|
Doc('SWITCH_GT0 = 25', 'binary: (x > 0) * y'), |
|
|
|
Doc('TANH_GRAD = 26', 'binary: (1 - x * x) * y'), |
|
|
|
Doc('TRUE_DIV = 27', 'binary: x / y'), |
|
|
|
Doc('LOG_SUM_EXP = 28', 'binary: numerically stable log(exp(x) + exp(y))'), |
|
|
|
|
|
|
|
Doc('LT = 29', 'binary: x < y'), |
|
|
|
Doc('LEQ = 30', 'binary: x <= y'), |
|
|
|
Doc('EQ = 31', 'binary: x == y'), |
|
|
|
|
|
|
|
Doc('SHL = 32', 'bitwise binary: x << y. ' |
|
|
|
'Note that result is undefined if y < 0 or y >= bitwidth. Logical ' |
|
|
|
'shift is performed for unsigned intergers, and arithmetic shift for ' |
|
|
|
'signed ones.'), |
|
|
|
Doc('SHR', 'bitwise binary: x >> y; see SHL mode for more details'), |
|
|
|
Doc('SHR = 33', 'bitwise binary: x >> y; see SHL mode for more details'), |
|
|
|
|
|
|
|
Doc('COND_LEQ_MOV', 'ternary: x <= y ? z : 0'), |
|
|
|
Doc('FUSE_MUL_ADD3', |
|
|
|
Doc('COND_LEQ_MOV = 34', 'ternary: x <= y ? z : 0'), |
|
|
|
Doc('FUSE_MUL_ADD3 = 35', |
|
|
|
'compute ``a * b + c`` where c must either have same layout as ' |
|
|
|
'a or b, or be a scalar'), |
|
|
|
|
|
|
|
Doc('FUSE_MUL_ADD4', |
|
|
|
Doc('FUSE_MUL_ADD4 = 36', |
|
|
|
'compute ``a * A + b * B`` where a and b must have equal layout, ' |
|
|
|
'and A and B must have equal layout. In the inputs ``b`` and ``B`` ' |
|
|
|
'can be swapped'), |
|
|
|
|
|
|
|
Doc('FUSE_ADD_RELU', 'binary: max(x+y, 0)'), |
|
|
|
Doc('FUSE_ADD_SIGMOID', 'binary: 1/(1+exp(-(x+y)))'), |
|
|
|
Doc('FUSE_ADD_TANH', 'binary: tanh(x+y)'), |
|
|
|
Doc('FAST_TANH', 'unary: rational approximation of tanh(x)'), |
|
|
|
Doc('FAST_TANH_GRAD', 'binary: grad of the rational approximation of tanh(x)'), |
|
|
|
Doc('FUSE_ADD_RELU = 37', 'binary: max(x+y, 0)'), |
|
|
|
Doc('FUSE_ADD_SIGMOID = 38', 'binary: 1/(1+exp(-(x+y)))'), |
|
|
|
Doc('FUSE_ADD_TANH = 39', 'binary: tanh(x+y)'), |
|
|
|
Doc('FAST_TANH = 40', 'unary: rational approximation of tanh(x)'), |
|
|
|
Doc('FAST_TANH_GRAD = 41', 'binary: grad of the rational approximation of tanh(x)'), |
|
|
|
|
|
|
|
Doc('ROUND', 'unary: round(x), the nearest integer value to x, rounding ' |
|
|
|
Doc('ROUND = 42', 'unary: round(x), the nearest integer value to x, rounding ' |
|
|
|
'halfway cases away from zero. Float only.'), |
|
|
|
Doc('RMULH', 'binary: rounded higher l bits of x * y, where l is the bit ' |
|
|
|
Doc('RMULH = 43', 'binary: rounded higher l bits of x * y, where l is the bit ' |
|
|
|
'length of x.'), |
|
|
|
|
|
|
|
Doc('ATAN2','binary: atan2(y,x)'), |
|
|
|
Doc('ERF', 'unary: erf(x)'), |
|
|
|
Doc('ERFINV', 'unary: inverse function of erf(x)'), |
|
|
|
Doc('ERFC', 'unary: erfc(x)'), |
|
|
|
Doc('ERFCINV', 'unary: inverse function of erfc(x)'), |
|
|
|
Doc('H_SWISH', 'unary: x * clip(x + 3, 0, 6) / 6'), |
|
|
|
Doc('H_SWISH_GRAD', 'binary: x < -3 ? 0 : (x > 3 ? y : (2 * x + 3) / 6 * y)'), |
|
|
|
Doc('FUSE_ADD_H_SWISH', 'binary: hswish(x+y)'), |
|
|
|
|
|
|
|
Doc('NOT', 'unary: !x'), |
|
|
|
Doc('AND', 'binary: x && y'), |
|
|
|
Doc('OR', 'binary: x || y'), |
|
|
|
Doc('XOR', 'binary: x ^ y'), |
|
|
|
Doc('SILU', 'unary: x / (1 + exp(-x))'), |
|
|
|
Doc('SILU_GRAD', 'binary: grad(x / (1 + exp(-x))'), |
|
|
|
Doc('GELU', 'unary: x Phi(x)'), |
|
|
|
Doc('GELU_GRAD', 'binary: grad(x Phi(x))'), |
|
|
|
Doc('ATAN2 = 44','binary: atan2(y,x)'), |
|
|
|
Doc('ERF = 45', 'unary: erf(x)'), |
|
|
|
Doc('ERFINV = 46', 'unary: inverse function of erf(x)'), |
|
|
|
Doc('ERFC = 47', 'unary: erfc(x)'), |
|
|
|
Doc('ERFCINV = 48', 'unary: inverse function of erfc(x)'), |
|
|
|
Doc('H_SWISH = 49', 'unary: x * clip(x + 3, 0, 6) / 6'), |
|
|
|
Doc('H_SWISH_GRAD = 50', 'binary: x < -3 ? 0 : (x > 3 ? y : (2 * x + 3) / 6 * y)'), |
|
|
|
Doc('FUSE_ADD_H_SWISH = 51', 'binary: hswish(x+y)'), |
|
|
|
|
|
|
|
Doc('NOT = 52', 'unary: !x'), |
|
|
|
Doc('AND = 53', 'binary: x && y'), |
|
|
|
Doc('OR = 54', 'binary: x || y'), |
|
|
|
Doc('XOR = 55', 'binary: x ^ y'), |
|
|
|
Doc('SILU = 56', 'unary: x / (1 + exp(-x))'), |
|
|
|
Doc('SILU_GRAD = 57', 'binary: grad(x / (1 + exp(-x))'), |
|
|
|
Doc('GELU = 58', 'unary: x Phi(x)'), |
|
|
|
Doc('GELU_GRAD = 59', 'binary: grad(x Phi(x))'), |
|
|
|
) |
|
|
|
|
|
|
|
pdef('ElemwiseMultiType').add_enum( |
|
|
|
'Mode', |
|
|
|
Doc('FUSE_MUL_ADD3_INT16x32x32x32', |
|
|
|
Doc('FUSE_MUL_ADD3_INT16x32x32x32 = 0', |
|
|
|
'compute ``a * b + c`` requiring that ``a`` be int16 and ``b`` and ' |
|
|
|
'``c`` int32, and the result is int32. This mode is optimized for ' |
|
|
|
'the channel-broadacsted case, i.e. ``a`` has shape (A, B, C) and ' |
|
|
|
'``b`` and ``c`` have shape (1, C, 1)'), |
|
|
|
Doc('FUSE_MUL_ADD3_IXxF32xF32xI8', |
|
|
|
Doc('FUSE_MUL_ADD3_IXxF32xF32xI8 = 1', |
|
|
|
'compuate ``a * b + c`` where the inputs ``a`` is an integer type ' |
|
|
|
'``b`` and ``c`` are both ``float32``, the result is ' |
|
|
|
'``int8``. This is currently only optimized for ``(1, x)`` ' |
|
|
|
'broadcast for ``b`` and ``c``. Computation is carried in floating ' |
|
|
|
'points and results are rounded towards zero with saturated cast to ' |
|
|
|
'int.'), |
|
|
|
Doc('ROUND_SHR_SATURATE_IXxI8xI8', |
|
|
|
Doc('ROUND_SHR_SATURATE_IXxI8xI8 = 2', |
|
|
|
'Compute ``a >> b``, round the result according to lower ``b`` bits ' |
|
|
|
'of ``a``` and make a saturating conversion to int8. Where ``a`` should' |
|
|
|
' be an integer tensor and ``b`` should be an int8 scalar.'), |
|
|
|
Doc('FUSE_ADD_RMULH_ROUND_SHR_SATURATE_INT16x16x16x8', |
|
|
|
Doc('FUSE_ADD_RMULH_ROUND_SHR_SATURATE_INT16x16x16x8 = 3', |
|
|
|
'Fused operation of an int16 elemwise add, an int16 rounding multiply ' |
|
|
|
'high and an int16 to int8 rounding right shift with saturation.'), |
|
|
|
Doc('FUSE_ADD_RMULH_ROUND_SHR_SATURATE_INT32x32x32x8', |
|
|
|
Doc('FUSE_ADD_RMULH_ROUND_SHR_SATURATE_INT32x32x32x8 = 4', |
|
|
|
'Fused operation of an int32 elemwise add, an int32 rounding multiply ' |
|
|
|
'high and an int32 to int8 rounding right shift with saturation.'), |
|
|
|
Doc('ROUND_SHR_SATURATE_IXxI8xI16', |
|
|
|
Doc('ROUND_SHR_SATURATE_IXxI8xI16 = 5', |
|
|
|
'Compute ``a >> b``, round the result according to lower ``b`` bits of ' |
|
|
|
'``a``` and make a saturating conversion to int16. Where ``a`` should' |
|
|
|
' be an integer tensor and ``b`` should be an int8 scalar.'), |
|
|
|
Doc('QADD', 'Fused elemwise add two quantized int8 with specified' |
|
|
|
Doc('QADD = 6', 'Fused elemwise add two quantized int8 with specified' |
|
|
|
'output quantized dtype'), |
|
|
|
Doc('QFUSE_ADD_RELU', 'Fused elemwise add two quantized int8 followed' |
|
|
|
Doc('QFUSE_ADD_RELU = 7', 'Fused elemwise add two quantized int8 followed' |
|
|
|
' by ReLU and typecvt to specified dtype'), |
|
|
|
Doc('QMUL', 'Fused elemwise multiply two quantized int8 with specified' |
|
|
|
Doc('QMUL = 8', 'Fused elemwise multiply two quantized int8 with specified' |
|
|
|
'output quantized dtype'), |
|
|
|
Doc('QMIN', 'Fused elemwise min two quantized int8 with specified' |
|
|
|
Doc('QMIN = 9', 'Fused elemwise min two quantized int8 with specified' |
|
|
|
'output quantized dtype'), |
|
|
|
Doc('QMAX', 'quantized: max(x, y), with specified output quantized dtype'), |
|
|
|
Doc('QSUB', 'quantized: x - y'), |
|
|
|
Doc('QTRUE_DIV', 'quantized: x / y'), |
|
|
|
Doc('QFUSE_ADD_SIGMOID', 'quantized: sigmoid(x + y)'), |
|
|
|
Doc('QFUSE_ADD_TANH', 'quantized: tanh(x + y)'), |
|
|
|
Doc('QRELU', 'quantized: x > 0 ? x : 0'), |
|
|
|
Doc('QABS', 'quantized: x > 0 ? x : -x'), |
|
|
|
Doc('QSIGMOID', 'quantized: sigmoid(x)'), |
|
|
|
Doc('QEXP', 'quantized: exp(x)'), |
|
|
|
Doc('QTANH', 'quantized: tanh(x)'), |
|
|
|
Doc('QFUSE_MUL_ADD3', 'quantized: x * y + z'), |
|
|
|
Doc('QFAST_TANH', 'quantized: fast_tanh(x)'), |
|
|
|
Doc('QNEGATE', 'quantized: -x'), |
|
|
|
Doc('QACOS', 'quantized: acos(x)'), |
|
|
|
Doc('QASIN', 'quantized: asin(x)'), |
|
|
|
Doc('QCEIL', 'quantized: ceil(x)'), |
|
|
|
Doc('QCOS', 'quantized: cos(x)'), |
|
|
|
Doc('QEXPM1', 'quantized: expm1(x)'), |
|
|
|
Doc('QFLOOR', 'quantized: floor(x)'), |
|
|
|
Doc('QLOG', 'quantized: log(x)'), |
|
|
|
Doc('QLOG1P', 'quantized: log1p(x)'), |
|
|
|
Doc('QSIN', 'quantized: sin(x)'), |
|
|
|
Doc('QROUND', 'quantized: round(x)'), |
|
|
|
Doc('QERF', 'quantized: erf(x)'), |
|
|
|
Doc('QERFINV', 'quantized: erfinv(x)'), |
|
|
|
Doc('QERFC', 'quantized: erfc(x)'), |
|
|
|
Doc('QERFCINV', 'quantized: erfcinv(x)'), |
|
|
|
Doc('QABS_GRAD', 'quantized: abs_grad'), |
|
|
|
Doc('QFLOOR_DIV', 'quantized floor_div'), |
|
|
|
Doc('QMOD', 'quantized mod'), |
|
|
|
Doc('QSIGMOID_GRAD', 'quantized sigmoid_grad'), |
|
|
|
Doc('QSWITCH_GT0', 'quantized switch_gt0'), |
|
|
|
Doc('QTANH_GRAD', 'quantized tanh_grad'), |
|
|
|
Doc('QLT', 'quantized lt'), |
|
|
|
Doc('QLEQ', 'quantized leq'), |
|
|
|
Doc('QEQ', 'quantized eq'), |
|
|
|
Doc('QPOW', 'quantized pow'), |
|
|
|
Doc('QLOG_SUM_EXP', 'quantized log_sum_exp'), |
|
|
|
Doc('QFAST_TANH_GRAD', 'quantized fast_tanh_grad'), |
|
|
|
Doc('QATAN2', 'quantized atan2'), |
|
|
|
Doc('QCOND_LEQ_MOV', 'quantized cond_leq_mov'), |
|
|
|
Doc('QH_SWISH', 'quantized h_swish'), |
|
|
|
Doc('QFUSE_ADD_H_SWISH', 'quantized h_swish(x+y)'), |
|
|
|
Doc('QH_SWISH_GRAD', 'quantized h_swish_grad') |
|
|
|
Doc('QMAX = 10', 'quantized: max(x, y), with specified output quantized dtype'), |
|
|
|
Doc('QSUB = 11', 'quantized: x - y'), |
|
|
|
Doc('QTRUE_DIV = 12', 'quantized: x / y'), |
|
|
|
Doc('QFUSE_ADD_SIGMOID = 13', 'quantized: sigmoid(x + y)'), |
|
|
|
Doc('QFUSE_ADD_TANH = 14', 'quantized: tanh(x + y)'), |
|
|
|
Doc('QRELU = 15', 'quantized: x > 0 ? x : 0'), |
|
|
|
Doc('QABS = 16', 'quantized: x > 0 ? x : -x'), |
|
|
|
Doc('QSIGMOID = 17', 'quantized: sigmoid(x)'), |
|
|
|
Doc('QEXP = 18', 'quantized: exp(x)'), |
|
|
|
Doc('QTANH = 19', 'quantized: tanh(x)'), |
|
|
|
Doc('QFUSE_MUL_ADD3 = 20', 'quantized: x * y + z'), |
|
|
|
Doc('QFAST_TANH = 21', 'quantized: fast_tanh(x)'), |
|
|
|
Doc('QNEGATE = 22', 'quantized: -x'), |
|
|
|
Doc('QACOS = 23', 'quantized: acos(x)'), |
|
|
|
Doc('QASIN = 24', 'quantized: asin(x)'), |
|
|
|
Doc('QCEIL = 25', 'quantized: ceil(x)'), |
|
|
|
Doc('QCOS = 26', 'quantized: cos(x)'), |
|
|
|
Doc('QEXPM1 = 27', 'quantized: expm1(x)'), |
|
|
|
Doc('QFLOOR = 28', 'quantized: floor(x)'), |
|
|
|
Doc('QLOG = 29', 'quantized: log(x)'), |
|
|
|
Doc('QLOG1P = 30', 'quantized: log1p(x)'), |
|
|
|
Doc('QSIN = 31', 'quantized: sin(x)'), |
|
|
|
Doc('QROUND = 32', 'quantized: round(x)'), |
|
|
|
Doc('QERF = 33', 'quantized: erf(x)'), |
|
|
|
Doc('QERFINV = 34', 'quantized: erfinv(x)'), |
|
|
|
Doc('QERFC = 35', 'quantized: erfc(x)'), |
|
|
|
Doc('QERFCINV = 36', 'quantized: erfcinv(x)'), |
|
|
|
Doc('QABS_GRAD = 37', 'quantized: abs_grad'), |
|
|
|
Doc('QFLOOR_DIV = 38', 'quantized floor_div'), |
|
|
|
Doc('QMOD = 39', 'quantized mod'), |
|
|
|
Doc('QSIGMOID_GRAD = 40', 'quantized sigmoid_grad'), |
|
|
|
Doc('QSWITCH_GT0 = 41', 'quantized switch_gt0'), |
|
|
|
Doc('QTANH_GRAD = 42', 'quantized tanh_grad'), |
|
|
|
Doc('QLT = 43', 'quantized lt'), |
|
|
|
Doc('QLEQ = 44', 'quantized leq'), |
|
|
|
Doc('QEQ = 45', 'quantized eq'), |
|
|
|
Doc('QPOW = 46', 'quantized pow'), |
|
|
|
Doc('QLOG_SUM_EXP = 47', 'quantized log_sum_exp'), |
|
|
|
Doc('QFAST_TANH_GRAD = 48', 'quantized fast_tanh_grad'), |
|
|
|
Doc('QATAN2 = 49', 'quantized atan2'), |
|
|
|
Doc('QCOND_LEQ_MOV = 50', 'quantized cond_leq_mov'), |
|
|
|
Doc('QH_SWISH = 51', 'quantized h_swish'), |
|
|
|
Doc('QFUSE_ADD_H_SWISH = 52', 'quantized h_swish(x+y)'), |
|
|
|
Doc('QH_SWISH_GRAD = 53', 'quantized h_swish_grad') |
|
|
|
) |
|
|
|
|
|
|
|
pdef('PowC', 'power with constant exponent').add_fields('float32', 'exp', 0) |
|
|
|
|
|
|
|
(pdef('DctChannelSelect', '2d discrete cosine transform', version=0, is_legacy=True).add_enum_alias('Format', 'ConvolutionV0'). |
|
|
|
add_enum('FastImpl', 'NONE', 'FIX_32_MASK').add_fields('int32', 'dct_block_size', 8)) |
|
|
|
add_enum('FastImpl', 'NONE = 0', 'FIX_32_MASK = 1').add_fields('int32', 'dct_block_size', 8)) |
|
|
|
|
|
|
|
(pdef('DctChannelSelect', '2d discrete cosine transform', version=1).add_enum_alias('Format', 'Convolution'). |
|
|
|
add_enum_alias('FastImpl', 'DctChannelSelectV0').add_fields('int32', 'dct_block_size', 8)) |
|
|
@@ -510,13 +510,13 @@ pdef('PowC', 'power with constant exponent').add_fields('float32', 'exp', 0) |
|
|
|
(pdef('MatrixMul', version=0, is_legacy=True). |
|
|
|
add_fields('bool', 'transposeA', 'false', 'transposeB', 'false'). |
|
|
|
add_enum('DataType', |
|
|
|
Doc('FLOAT', 'input/output both float32/float16'), |
|
|
|
'INT8x8x16', |
|
|
|
'INT8x8x32', |
|
|
|
Doc('FLOAT_IO16xC32', 'input/output both float16, the internal compute is ' |
|
|
|
Doc('FLOAT = 0', 'input/output both float32/float16'), |
|
|
|
'INT8x8x16 = 1', |
|
|
|
'INT8x8x32 = 2', |
|
|
|
Doc('FLOAT_IO16xC32 = 3', 'input/output both float16, the internal compute is ' |
|
|
|
'float32'), |
|
|
|
Doc('QUINT8x8x32', 'input QuantizedAsymm8, output QuantizedS32'), |
|
|
|
Doc('QUINT4x4x32', 'input QuantizedAsymm4, output QuantizedS32'), |
|
|
|
Doc('QUINT8x8x32 = 4', 'input QuantizedAsymm8, output QuantizedS32'), |
|
|
|
Doc('QUINT4x4x32 = 5', 'input QuantizedAsymm4, output QuantizedS32'), |
|
|
|
name_field='data_type')) |
|
|
|
|
|
|
|
(pdef('MatrixMul', version=1, is_legacy=True). |
|
|
@@ -524,9 +524,9 @@ pdef('PowC', 'power with constant exponent').add_fields('float32', 'exp', 0) |
|
|
|
add_enum(Doc('ComputeMode', 'Specifies special computation modes, e.g. ' |
|
|
|
'different combinations of intermediate result ' |
|
|
|
'data types.'), |
|
|
|
Doc('DEFAULT', 'No special requirements on the precision of ' |
|
|
|
Doc('DEFAULT = 0', 'No special requirements on the precision of ' |
|
|
|
'intermediate results.'), |
|
|
|
Doc('FLOAT32', 'Use Float32 accumulator and intermediate result. ' |
|
|
|
Doc('FLOAT32 = 1', 'Use Float32 accumulator and intermediate result. ' |
|
|
|
'Only supported when input and output is Float16.'), |
|
|
|
name_field='compute_mode')) |
|
|
|
|
|
|
@@ -534,14 +534,14 @@ pdef('PowC', 'power with constant exponent').add_fields('float32', 'exp', 0) |
|
|
|
add_fields('bool', 'transposeA', 'false', 'transposeB', 'false'). |
|
|
|
add_enum_alias('ComputeMode', 'MatrixMulV1', name_field='compute_mode'). |
|
|
|
add_enum('Format', |
|
|
|
Doc('DEFAULT', 'Normal matrix mul: (M, K) x (K, N) = (M, N)'), |
|
|
|
Doc('MK4', 'Split 4 from M and K, better for neon compute:' |
|
|
|
Doc('DEFAULT = 0', 'Normal matrix mul: (M, K) x (K, N) = (M, N)'), |
|
|
|
Doc('MK4 = 1', 'Split 4 from M and K, better for neon compute:' |
|
|
|
'(M/4, K/4, 4(k), 4(m)) x (K/4, N, 4(k)). if transposeA the ' |
|
|
|
'layout is (K/4, M/4, 4(k), 4(m)) x (K/4, N, 4(k))'), |
|
|
|
Doc('MK8', 'Split 8 from M and K, better for neon compute:' |
|
|
|
Doc('MK8 = 2', 'Split 8 from M and K, better for neon compute:' |
|
|
|
'(M/8, K/8, 8(k), 8(m)) x (K/8, N, 8(k)). if transposeA the ' |
|
|
|
'layout is (K/8, M/8, 8(k), 8(m)) x (K/8, N, 8(k))'), |
|
|
|
Doc('MK4_DOT', 'Split 4 from M and K, better for neon dotprod:' |
|
|
|
Doc('MK4_DOT = 3', 'Split 4 from M and K, better for neon dotprod:' |
|
|
|
'M/4, K/4, 4(m), 4(k)) x (K/4, N, 4(k)). if transposeA the ' |
|
|
|
'layout is (K/4, M/4, 4(m), 4(k)) x (K/4, N, 4(k))')) |
|
|
|
) |
|
|
@@ -560,9 +560,9 @@ pdef('PowC', 'power with constant exponent').add_fields('float32', 'exp', 0) |
|
|
|
|
|
|
|
(pdef('Reduce', 'legacy reduce', version=0, is_legacy=True). |
|
|
|
add_enum('Mode', |
|
|
|
'SUM', |
|
|
|
Doc('SUM_SQR', 'sum of x * x for each element x'), |
|
|
|
'PRODUCT', 'MIN', 'MAX'). |
|
|
|
'SUM = 0', |
|
|
|
Doc('SUM_SQR = 1', 'sum of x * x for each element x'), |
|
|
|
'PRODUCT = 2', 'MIN = 3', 'MAX = 4'). |
|
|
|
add_fields('int32', |
|
|
|
Doc('axis', |
|
|
|
'axis along which reduction is performed; if -1 is given, ' |
|
|
@@ -571,16 +571,16 @@ pdef('PowC', 'power with constant exponent').add_fields('float32', 'exp', 0) |
|
|
|
|
|
|
|
(pdef('Reduce', 'reduce along given axis', version=1, is_legacy=True). |
|
|
|
add_enum('Mode', |
|
|
|
'SUM', |
|
|
|
Doc('SUM_SQR', 'sum of x * x for each element x'), |
|
|
|
'PRODUCT', 'MIN', 'MAX', 'MEAN'). |
|
|
|
'SUM = 0', |
|
|
|
Doc('SUM_SQR = 1', 'sum of x * x for each element x'), |
|
|
|
'PRODUCT = 2', 'MIN = 3', 'MAX = 4', 'MEAN = 5'). |
|
|
|
add_fields('int32', |
|
|
|
Doc('axis', |
|
|
|
'axis along which reduction is performed; if -1 is given, ' |
|
|
|
'reduce to given target shape (only used in megbrain)'), |
|
|
|
-1). |
|
|
|
add_enum('DataType', |
|
|
|
Doc('DEFAULT', |
|
|
|
Doc('DEFAULT = 0', |
|
|
|
''' |
|
|
|
input/output are the same data type, and the internal computation type would be chosen by the input/output dtypes and the reduction mode. |
|
|
|
Currently, ```DEFAULT``` mode means: |
|
|
@@ -607,26 +607,26 @@ Currently, ```DEFAULT``` mode means: |
|
|
|
|
|
|
|
''' |
|
|
|
), |
|
|
|
Doc('FLOAT_IO16xC32', 'Deprecated. This was replaced by ' |
|
|
|
Doc('FLOAT_IO16xC32 = 1', 'Deprecated. This was replaced by ' |
|
|
|
'FLOAT_O16xC32, and input\'s dtype decided by actual input tensor.'), |
|
|
|
Doc('FLOAT_O32xC32', 'compute/output both are float32'), |
|
|
|
Doc('FLOAT_O16xC32', 'compute are float32, output float16'), |
|
|
|
Doc('QUINT_I8xO32', 'input quint8, compute and output are qint32'), |
|
|
|
Doc('QINT_I8xO32', 'input qint8, compute and output are qint32'), |
|
|
|
Doc('FLOAT_O32xC32 = 2', 'compute/output both are float32'), |
|
|
|
Doc('FLOAT_O16xC32 = 3', 'compute are float32, output float16'), |
|
|
|
Doc('QUINT_I8xO32 = 4', 'input quint8, compute and output are qint32'), |
|
|
|
Doc('QINT_I8xO32 = 5', 'input qint8, compute and output are qint32'), |
|
|
|
name_field='data_type')) |
|
|
|
|
|
|
|
(pdef('Reduce', 'reduce along given axis', version=2). |
|
|
|
add_enum('Mode', |
|
|
|
'SUM', |
|
|
|
Doc('SUM_SQR', 'sum of x * x for each element x'), |
|
|
|
'PRODUCT', 'MIN', 'MAX', 'MEAN'). |
|
|
|
'SUM = 0', |
|
|
|
Doc('SUM_SQR = 1', 'sum of x * x for each element x'), |
|
|
|
'PRODUCT = 2', 'MIN = 3', 'MAX = 4', 'MEAN = 5'). |
|
|
|
add_fields('int32', |
|
|
|
Doc('axis', |
|
|
|
'axis along which reduction is performed; if INT_MAX is given, ' |
|
|
|
'reduce to given target shape (only used in megbrain)'), |
|
|
|
(1<<31)-1). |
|
|
|
add_enum('DataType', |
|
|
|
Doc('DEFAULT', |
|
|
|
Doc('DEFAULT = 0', |
|
|
|
''' |
|
|
|
input/output are the same data type, and the internal computation type would be chosen by the input/output dtypes and the reduction mode. |
|
|
|
Currently, ```DEFAULT``` mode means: |
|
|
@@ -653,12 +653,12 @@ Currently, ```DEFAULT``` mode means: |
|
|
|
|
|
|
|
''' |
|
|
|
), |
|
|
|
Doc('FLOAT_IO16xC32', 'Deprecated. This was replaced by ' |
|
|
|
Doc('FLOAT_IO16xC32 = 1', 'Deprecated. This was replaced by ' |
|
|
|
'FLOAT_O16xC32, and input\'s dtype decided by actual input tensor.'), |
|
|
|
Doc('FLOAT_O32xC32', 'compute/output both are float32'), |
|
|
|
Doc('FLOAT_O16xC32', 'compute are float32, output float16'), |
|
|
|
Doc('QUINT_I8xO32', 'input quint8, compute and output are qint32'), |
|
|
|
Doc('QINT_I8xO32', 'input qint8, compute and output are qint32'), |
|
|
|
Doc('FLOAT_O32xC32 = 2', 'compute/output both are float32'), |
|
|
|
Doc('FLOAT_O16xC32 = 3', 'compute are float32, output float16'), |
|
|
|
Doc('QUINT_I8xO32 = 4', 'input quint8, compute and output are qint32'), |
|
|
|
Doc('QINT_I8xO32 = 5', 'input qint8, compute and output are qint32'), |
|
|
|
name_field='data_type')) |
|
|
|
|
|
|
|
(pdef('Cumsum', 'calculate accumulated sum along given axis', version=0, is_legacy=True). |
|
|
@@ -691,12 +691,12 @@ Currently, ```DEFAULT``` mode means: |
|
|
|
|
|
|
|
(pdef('CondTake'). |
|
|
|
add_enum('Mode', |
|
|
|
Doc('EQ', 'take if ``abs(data-val)<eps``'), |
|
|
|
Doc('NEQ', 'take if ``abs(data-val)>=eps``'), |
|
|
|
Doc('LT', 'take if ``data<val``'), |
|
|
|
Doc('LEQ', 'take if ``data<=val``'), |
|
|
|
Doc('GT', 'take if ``data>val``'), |
|
|
|
Doc('GEQ', 'take if ``data>=val``')). |
|
|
|
Doc('EQ = 0', 'take if ``abs(data-val)<eps``'), |
|
|
|
Doc('NEQ = 1', 'take if ``abs(data-val)>=eps``'), |
|
|
|
Doc('LT = 2', 'take if ``data<val``'), |
|
|
|
Doc('LEQ = 3', 'take if ``data<=val``'), |
|
|
|
Doc('GT = 4', 'take if ``data>val``'), |
|
|
|
Doc('GEQ = 5', 'take if ``data>=val``')). |
|
|
|
add_fields('float32', |
|
|
|
Doc('val', 'the value to be compared with; note that for integer ' |
|
|
|
'data, val is also converted to int'), 0). |
|
|
@@ -704,7 +704,7 @@ Currently, ```DEFAULT``` mode means: |
|
|
|
1e-6)) |
|
|
|
|
|
|
|
|
|
|
|
pdef('Argsort').add_enum('Order', 'ASCENDING', 'DESCENDING') |
|
|
|
pdef('Argsort').add_enum('Order', 'ASCENDING = 0', 'DESCENDING = 1') |
|
|
|
|
|
|
|
(pdef('IndexingRemap'). |
|
|
|
add_fields('bool', |
|
|
@@ -791,17 +791,17 @@ pdef('Sleep').add_fields('float32', Doc('time', 'time to sleep in seconds'), 0) |
|
|
|
.add_fields('uint32', 'row_from', 0, 'row_to', 0, 'col_from', 0, 'col_to', 0)) |
|
|
|
|
|
|
|
(pdef('CvtColor') |
|
|
|
.add_enum('Mode', 'RGB2GRAY', 'RGB2YUV', 'YUV2RGB', 'GRAY2RGB', 'RGBA2RGB', |
|
|
|
'RGBA2BGR', 'RGBA2GRAY', 'RGB2BGR', 'BGR2GRAY', 'BGR2RGB', |
|
|
|
Doc('YUV2GRAY_NV21', 'For historical reasons, referred to as YCC by opencv'), |
|
|
|
'YUV2RGB_NV21', 'YUV2BGR_NV21', 'YUV2GRAY_NV12', 'YUV2RGB_NV12', |
|
|
|
'YUV2BGR_NV12', 'YUV2GRAY_YV12', 'YUV2RGB_YV12', 'YUV2BGR_YV12', |
|
|
|
'YUV2GRAY_YU12', 'YUV2RGB_YU12', 'YUV2BGR_YU12', |
|
|
|
'YCrCb2RGB', 'YCrCb2BGR', |
|
|
|
Doc('BT601_YUV2RGB_NV21', 'BT601 yuv format, referred to as YUV by opencv'), |
|
|
|
'BT601_YUV2BGR_NV21', 'BT601_YUV2RGB_NV12', 'BT601_YUV2BGR_NV12', |
|
|
|
'BT601_YUV2RGB_YV12', 'BT601_YUV2BGR_YV12', 'BT601_YUV2RGB_YU12', |
|
|
|
'BT601_YUV2BGR_YU12', |
|
|
|
.add_enum('Mode', 'RGB2GRAY = 0', 'RGB2YUV = 1', 'YUV2RGB = 2', 'GRAY2RGB = 3', 'RGBA2RGB = 4', |
|
|
|
'RGBA2BGR = 5', 'RGBA2GRAY = 6', 'RGB2BGR = 7', 'BGR2GRAY = 8', 'BGR2RGB = 9', |
|
|
|
Doc('YUV2GRAY_NV21 = 10', 'For historical reasons, referred to as YCC by opencv'), |
|
|
|
'YUV2RGB_NV21 = 11', 'YUV2BGR_NV21 = 12', 'YUV2GRAY_NV12 = 13', 'YUV2RGB_NV12 = 14', |
|
|
|
'YUV2BGR_NV12 = 15', 'YUV2GRAY_YV12 = 16', 'YUV2RGB_YV12 = 17', 'YUV2BGR_YV12 = 18', |
|
|
|
'YUV2GRAY_YU12 = 19', 'YUV2RGB_YU12 = 20', 'YUV2BGR_YU12 = 21', |
|
|
|
'YCrCb2RGB = 22', 'YCrCb2BGR = 23', |
|
|
|
Doc('BT601_YUV2RGB_NV21 = 24', 'BT601 yuv format, referred to as YUV by opencv'), |
|
|
|
'BT601_YUV2BGR_NV21 = 25', 'BT601_YUV2RGB_NV12 = 26', 'BT601_YUV2BGR_NV12 = 27', |
|
|
|
'BT601_YUV2RGB_YV12 = 28', 'BT601_YUV2BGR_YV12 = 29', 'BT601_YUV2RGB_YU12 = 30', |
|
|
|
'BT601_YUV2BGR_YU12 = 31', |
|
|
|
member_alias=[('YUV2GRAY_NV21', 'BT601_YUV2GRAY_NV21'), |
|
|
|
('YUV2GRAY_NV12', 'BT601_YUV2GRAY_NV12'), |
|
|
|
('YUV2GRAY_YV12', 'BT601_YUV2GRAY_YV12'), |
|
|
@@ -855,7 +855,7 @@ pdef('Sleep').add_fields('float32', Doc('time', 'time to sleep in seconds'), 0) |
|
|
|
.add_fields('float32', 'scalar', '0.f')) |
|
|
|
|
|
|
|
(pdef('Convolution3D'). |
|
|
|
add_enum('Mode', 'CROSS_CORRELATION', 'CONVOLUTION'). |
|
|
|
add_enum('Mode', 'CROSS_CORRELATION = 0', 'CONVOLUTION = 1'). |
|
|
|
add_fields( |
|
|
|
'uint32', |
|
|
|
Doc('pad_d', 'padding on one side on the first dimension'), 0, |
|
|
@@ -872,32 +872,32 @@ pdef('Sleep').add_fields('float32', Doc('time', 'time to sleep in seconds'), 0) |
|
|
|
'on the third dimension'), 1 |
|
|
|
). |
|
|
|
add_enum('Sparse', |
|
|
|
Doc('DENSE', 'dense convolution: filter shape should be ' |
|
|
|
Doc('DENSE = 0', 'dense convolution: filter shape should be ' |
|
|
|
'[oc, ic, spatial...] if format is NCDHW, ' |
|
|
|
'[oc, spatial..., ic] if format is NDHWC'), |
|
|
|
Doc('GROUP', 'group convolution: filter shape should be ' |
|
|
|
Doc('GROUP = 1', 'group convolution: filter shape should be ' |
|
|
|
'[group, oc_per_group, ic_per_group, spatial...] if format is NCDHW, ' |
|
|
|
'[group, oc_per_group, spatial..., ic_per_group] if format is NDHWC') |
|
|
|
). |
|
|
|
add_enum('DataType', |
|
|
|
Doc('FLOAT', 'input/output both float32/float16'), |
|
|
|
Doc('FLOAT_IO16xC32', 'input/output both float16, the internal ' |
|
|
|
Doc('FLOAT = 0', 'input/output both float32/float16'), |
|
|
|
Doc('FLOAT_IO16xC32 = 1', 'input/output both float16, the internal ' |
|
|
|
'compute is float32'), |
|
|
|
name_field='data_type'). |
|
|
|
add_enum('Format', 'NCDHW', 'NDHWC') |
|
|
|
add_enum('Format', 'NCDHW = 0', 'NDHWC = 1') |
|
|
|
) |
|
|
|
|
|
|
|
(pdef('Conv3DBias'). |
|
|
|
add_enum('NonlineMode', 'IDENTITY', 'RELU', 'SIGMOID'). |
|
|
|
add_enum('NonlineMode', 'IDENTITY = 0', 'RELU = 1', 'SIGMOID = 2'). |
|
|
|
add_enum_alias('Mode', 'Convolution3D'). |
|
|
|
add_fields('uint32', 'pad_d', 0, 'pad_h', 0, 'pad_w', 0, |
|
|
|
'stride_d', 1, 'stride_h', 1, 'stride_w', 0)) |
|
|
|
|
|
|
|
(pdef('SeparableConv3D'). |
|
|
|
add_enum_alias('Mode', 'Convolution3D'). |
|
|
|
add_enum('BorderMode', 'BORDER_REPLICATE', 'BORDER_REFLECT', |
|
|
|
'BORDER_REFLECT_101','BORDER_WRAP', |
|
|
|
'BORDER_CONSTANT', 'BORDER_TRANSPARENT','BORDER_ISOLATED'). |
|
|
|
add_enum('BorderMode', 'BORDER_REPLICATE = 0', 'BORDER_REFLECT = 1', |
|
|
|
'BORDER_REFLECT_101 = 2','BORDER_WRAP = 3', |
|
|
|
'BORDER_CONSTANT = 4', 'BORDER_TRANSPARENT = 5','BORDER_ISOLATED = 6'). |
|
|
|
add_fields('bool', 'is_symm_kernel', 'true'). |
|
|
|
add_fields('uint32', 'pad_d', 0, 'pad_h', 0, 'pad_w', 0, |
|
|
|
'stride_d', 0, 'stride_h', 1, 'stride_w', 1, |
|
|
@@ -907,11 +907,11 @@ pdef('Sleep').add_fields('float32', Doc('time', 'time to sleep in seconds'), 0) |
|
|
|
(pdef('TopK'). |
|
|
|
add_enum( |
|
|
|
'Mode', |
|
|
|
Doc('KTH_ONLY', "only the value of the k'th element would be computed"), |
|
|
|
Doc('VALUE_IDX_NOSORT', |
|
|
|
Doc('KTH_ONLY = 0', "only the value of the k'th element would be computed"), |
|
|
|
Doc('VALUE_IDX_NOSORT = 1', |
|
|
|
'all the top-k values and corresponding indices would be computed; ' |
|
|
|
'no order is guaranteed'), |
|
|
|
Doc('VALUE_IDX_SORTED', |
|
|
|
Doc('VALUE_IDX_SORTED = 2', |
|
|
|
'all the top-k values and corresponding indices sorted')) |
|
|
|
) |
|
|
|
|
|
|
@@ -983,37 +983,37 @@ Note: NCHW_NCHW4_WEIGHT will auto pad oc and ic, you should remove oc in later o |
|
|
|
(pdef('RelayoutFormat', 'Change the tensor layout format', version=0, is_legacy=True). |
|
|
|
add_enum( |
|
|
|
Doc('Mode', RELAYOUT_FORMAT_MODE_DOC), |
|
|
|
'NHWC_NHWCD4', |
|
|
|
'NHWCD4_NHWC', |
|
|
|
'NHWC_NHWCD4I', |
|
|
|
'NCHW_NHWCD4', |
|
|
|
'NCHW_NHWCD4I', |
|
|
|
'NHWCD4I_NCHW', |
|
|
|
'NHWCD4_NCHW', |
|
|
|
'INTER_WEIGHT_DENSE', |
|
|
|
'INTER_WEIGHT_DENSEI', |
|
|
|
'INTER_WEIGHT_GROUP', |
|
|
|
'INTER_WEIGHT_GROUPI', |
|
|
|
'INTER_WEIGHT_CHAN', |
|
|
|
'INTER_WEIGHT_CHANI', |
|
|
|
'INTER_WEIGHT_DENSEI_DOT', |
|
|
|
'INTER_WEIGHT_GROUPI_DOT', |
|
|
|
'NCHW4_CHWN4', |
|
|
|
'CHWN4_NCHW4', |
|
|
|
'NCHW_NCHW88_CONV_DENSE_WEIGHT', |
|
|
|
'NCHW_NCHW88_CONV_CHAN_WEIGHT', |
|
|
|
'NCHW_NCHW88_CONV_GROUP_WEIGHT', |
|
|
|
'NCHW_NCHW88', |
|
|
|
'NCHW88_NCHW', |
|
|
|
'NCHW_NCHW4_IC_SMALL', |
|
|
|
'NCHW_NCHW4_IC_SMALL_CONV_DENSE_WEIGHT', |
|
|
|
'NCHW_NCHW4', |
|
|
|
'NCHW4_NCHW', |
|
|
|
'NCHW_NCHW4_WEIGHT', |
|
|
|
'NCHW_NCHW64', |
|
|
|
'NCHW64_NCHW', |
|
|
|
'NCHW_NHWC', |
|
|
|
'NHWC_NCHW', |
|
|
|
'NHWC_NHWCD4 = 0', |
|
|
|
'NHWCD4_NHWC = 1', |
|
|
|
'NHWC_NHWCD4I = 2', |
|
|
|
'NCHW_NHWCD4 = 3', |
|
|
|
'NCHW_NHWCD4I = 4', |
|
|
|
'NHWCD4I_NCHW = 5', |
|
|
|
'NHWCD4_NCHW = 6', |
|
|
|
'INTER_WEIGHT_DENSE = 7', |
|
|
|
'INTER_WEIGHT_DENSEI = 8', |
|
|
|
'INTER_WEIGHT_GROUP = 9', |
|
|
|
'INTER_WEIGHT_GROUPI = 10', |
|
|
|
'INTER_WEIGHT_CHAN = 11', |
|
|
|
'INTER_WEIGHT_CHANI = 12', |
|
|
|
'INTER_WEIGHT_DENSEI_DOT = 13', |
|
|
|
'INTER_WEIGHT_GROUPI_DOT = 14', |
|
|
|
'NCHW4_CHWN4 = 15', |
|
|
|
'CHWN4_NCHW4 = 16', |
|
|
|
'NCHW_NCHW88_CONV_DENSE_WEIGHT = 17', |
|
|
|
'NCHW_NCHW88_CONV_CHAN_WEIGHT = 18', |
|
|
|
'NCHW_NCHW88_CONV_GROUP_WEIGHT = 19', |
|
|
|
'NCHW_NCHW88 = 20', |
|
|
|
'NCHW88_NCHW = 21', |
|
|
|
'NCHW_NCHW4_IC_SMALL = 22', |
|
|
|
'NCHW_NCHW4_IC_SMALL_CONV_DENSE_WEIGHT = 23', |
|
|
|
'NCHW_NCHW4 = 24', |
|
|
|
'NCHW4_NCHW = 25', |
|
|
|
'NCHW_NCHW4_WEIGHT = 26', |
|
|
|
'NCHW_NCHW64 = 27', |
|
|
|
'NCHW64_NCHW = 28', |
|
|
|
'NCHW_NHWC = 29', |
|
|
|
'NHWC_NCHW = 30', |
|
|
|
) |
|
|
|
) |
|
|
|
|
|
|
@@ -1077,7 +1077,7 @@ Note: NCHW_NCHW4_WEIGHT will auto pad oc and ic, you should remove oc in later o |
|
|
|
|
|
|
|
|
|
|
|
(pdef('ROIAlign',version=0,is_legacy=True). |
|
|
|
add_enum('Mode', 'MAX', 'AVERAGE', name_field='mode'). |
|
|
|
add_enum('Mode', 'MAX = 0', 'AVERAGE = 1', name_field='mode'). |
|
|
|
add_enum_alias('Format', 'ConvolutionV0'). |
|
|
|
add_fields('float32', 'spatial_scale', '1.0'). |
|
|
|
add_fields('float32', 'offset', '0.0'). |
|
|
@@ -1173,9 +1173,9 @@ Note: NCHW_NCHW4_WEIGHT will auto pad oc and ic, you should remove oc in later o |
|
|
|
pdef('Fill').add_fields('float32', 'value', '0') |
|
|
|
|
|
|
|
|
|
|
|
PADDING_MODES = [Doc('REPLICATE', 'aaaaaa|abcdefgh|hhhhhhh'), |
|
|
|
Doc('REFLECT', 'fedcba|abcdefgh|hgfedcb'), |
|
|
|
Doc('CONSTANT', 'iiiiii|abcdefgh|iiiiiii')] |
|
|
|
PADDING_MODES = [Doc('REPLICATE = 0', 'aaaaaa|abcdefgh|hhhhhhh'), |
|
|
|
Doc('REFLECT = 1', 'fedcba|abcdefgh|hgfedcb'), |
|
|
|
Doc('CONSTANT = 2', 'iiiiii|abcdefgh|iiiiiii')] |
|
|
|
(pdef('Padding'). |
|
|
|
add_fields('uint32', Doc('front_offset_dim0','offset in dim 0'), 0). |
|
|
|
add_fields('uint32', Doc('front_offset_dim1','offset in dim 1'), 0). |
|
|
|