|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237 |
- // generated by gen_param_defs.py for c23d51f3c4f33119fd74f58f04d112ccea8f64f1249ab372300975ab7e710e9a
- include "dtype.fbs";
- namespace mgb.serialization.fbs.param;
-
- /// mode of collective communication
- enum CollectiveCommMode : uint {
- /// reduce by sum to output computing node
- REDUCE_SUM = 0,
- /// copy input value to each output computing node
- BROADCAST = 1,
- /// each output comp node gets the concatenated value of all inputs
- ALL_GATHER = 2,
- /// reduce inputs by sum and each output gets one part of it
- REDUCE_SCATTER_SUM = 3,
- /// every output gets the sum of all inputs
- ALL_REDUCE_SUM = 4,
- /// every output gets the max of all inputs
- ALL_REDUCE_MAX = 5,
- /// every output gets the min of all inputs
- ALL_REDUCE_MIN = 6,
- /// every output gets the prod of all inputs
- ALL_REDUCE_PROD = 7,
- /// concat inputs to one node
- GATHER = 8,
- /// scatter input to each output computing node
- SCATTER = 9,
- /// scatter inputs and gather them on each computing node
- ALL_TO_ALL = 10,
- }
-
- /// mode for computing the gradient
- enum CondExecMarkGradMode : uint {
- /// normal gradient mode: sum all the activated components
- SUM = 0,
- /// use :attr:`CondExecMerge.SUM_COND_OUT` mode so oprs that depend on the
- /// gradient opr would not be executed if the forward var is not used.
- SUM_COND_OUT = 1,
- }
-
- /// static inference option. **Note:** This is a workaround: since
- /// currently static inference in MegBrain does not take conditional
- /// execution into account, this option can be used to bypass static
- /// inference errors. This is currently only used by automatically
- /// generated gradient oprs.
- enum CondExecMarkStaticInfer : uint {
- /// enable both shape and value inference
- SHAPE_VALUE = 0,
- /// only enable shape inference (disable value inference)
- SHAPE_ONLY = 1,
- /// disable both shape and value inference
- NONE = 2,
- }
-
- enum CondExecMergeMode : uint {
- /// copy the var whose mask is activated to the output, requiring that
- /// exactly one branch is active
- EXACT_ONE = 0,
- /// like :attr:`EXACT_ONE` with the requirement that all branches have the
- /// same shape, so shape inference can be easier
- EXACT_ONE_SAME_SHAPE = 1,
- /// sum all the active branches into output var; require all branches to
- /// have the same shape. Extra shape vars are needed in this mod, so the
- /// outputs can be initialized to zero when no input is active (and their
- /// shapes are probably unknown).
- SUM = 2,
- /// like :attr:`SUM` but also add an ExecutionMask to the readers of output
- /// vars, so they would be skipped if no branch is taken
- SUM_COND_OUT = 3,
- }
-
- /// how to compare predicate var with branch keys
- enum CondExecPredMode : uint {
- /// The outputs correspond to branch keys, and the one which equals
- /// predicate would be activated. This behaves like a case-statement in many
- /// languages.
- CASE = 0,
- /// like :attr:`CASE`, but add an extra output that would be activated if no
- /// branch is matched
- CASE_FALLBACK = 1,
- /// One more outputs would be produced than the number of branch keys,
- /// representing the interval in which the predicate var fits in. The
- /// intervals are defined as :math:`(-\\infty, k_0), [k_0, k_1), \\ldots,
- /// [k_{n-2}, k_{n-1}), [k_{n-1}, \infty)`. The keys must be given in
- /// ascending order.
- PIECEWISE = 2,
- }
-
- enum CondExecPredLogicalMode : uint {
- /// logical or
- OR = 0,
- /// logical and
- AND = 1,
- /// exclusive-or
- XOR = 2,
- /// not or(inputs)
- NOR = 3,
- /// not and(inputs)
- NAND = 4,
- /// not xor(inputs)
- XNOR = 5,
- }
-
- enum ExecutionPolicyStrategy : uint (bit_flags) {
- /// use heuristic to choose the fastest algorithm
- HEURISTIC = 0,
- /// run possible algorithms on real device to find the best
- PROFILE = 1,
- /// when profile or heuristic algo selection it require the algosmust be
- /// reproducible
- REPRODUCIBLE = 2,
- /// profile require algos are optmized to achieve fast-profile
- OPTIMIZED = 3,
- }
-
- enum ExecutionPolicyV0Strategy : uint {
- /// use heuristic to choose the fastest algorithm
- HEURISTIC = 0,
- /// use heuristic to choose the fastest algorithm, and the chosen algorithm
- /// is reproducible
- HEURISTIC_REPRODUCIBLE = 1,
- /// run possible algorithms on real device to find the best
- PROFILE = 2,
- /// the fastest of profile result that is also reproducible
- PROFILE_REPRODUCIBLE = 3,
- /// use profile result and heuristic to choose the fastest algorithm
- PROFILE_HEURISTIC = 4,
- }
-
- table DType {
- dtype:DTypeEnum = Byte;
- }
-
- table PersistentOutputStorage {
- /// This is used for controlling memory sharing. Multiple
- /// ``PersistentOutputStorage'' oprs with the same ``share_key'' would share
- /// underlying tensor storage. Note that the value ``-1'' is treated
- /// specially: storage of oprs with this key would be private and would not
- /// be shared with any other opr.
- share_key:int = -1;
- }
-
- /// optinal axis: axis == -1 means no axis
- table OptionalAxis {
- axis:int = -1;
- }
-
- /// optinal axis: axis == MAX_NDIM means no axis
- table OptionalAxisV1 {
- axis:int = 7;
- }
-
- table ExecutionPolicyV0 {
- strategy:ExecutionPolicyV0Strategy = HEURISTIC;
- /// workspace limit in bytes
- workspace_limit:ulong = 18446744073709551615;
- }
-
- /// specify how to select an algorithm for an operator
- table ExecutionPolicy {
- strategy:ExecutionPolicyStrategy = 1;
- /// workspace limit in bytes
- workspace_limit:ulong = 18446744073709551615;
- }
-
- table AssertEqual {
- /// max allowed error; error is defined as the minimal of absolute and
- /// relative error
- maxerr:float = 0.0001;
- /// whether to print maxerr to stdout during opr exec
- verbose:bool = false;
- }
-
- table FpgaConv {
- need_output_quantize:bool = false;
- need_output_threshold:bool = false;
- stride:int = 1;
- input_bit_width:int = 2;
- output_bit_width:int = 2;
- weight_bit_width:int = 2;
- thres0:int = 0;
- thres1:int = 1;
- unpool_size:uint = 4;
- direct_size:uint = 4;
- }
-
- /// collective communication between multiple computing nodes on localhost
- table CollectiveComm {
- /// mode of collective communication
- mode:CollectiveCommMode = REDUCE_SUM;
- }
-
- /// HACK: The tag of this param def is actually used for another non-generated
- /// param def SerializedDType, the sole purpose of this param def is to provide
- /// a spare tag. Do not use.
- table FakeSerializedDType {
- }
-
- /// evaluate a predicate and branch keys to setup ExecutionMask objects with
- /// associated predicate proxy vars (PPVs)
- table CondExecPred {
- /// how to compare predicate var with branch keys
- mode:CondExecPredMode = CASE;
- /// threshold for checking equality of float point values
- eps:float = 0.0001;
- }
-
- /// compute a logical function over a set of PPVs
- table CondExecPredLogical {
- mode:CondExecPredLogicalMode = OR;
- }
-
- /// add ExecutionMask of the input PPV to this opr and readers of the outputs of
- /// this opr
- table CondExecMark {
- /// mode for computing the gradient
- grad_mode:CondExecMarkGradMode = SUM;
- /// static inference option. **Note:** This is a workaround: since
- /// currently static inference in MegBrain does not take conditional
- /// execution into account, this option can be used to bypass static
- /// inference errors. This is currently only used by automatically
- /// generated gradient oprs.
- static_infer:CondExecMarkStaticInfer = SHAPE_VALUE;
- }
-
- /// merge multiple conditional execution branches
- table CondExecMerge {
- /// number of output vars (i.e. vars per branch)
- nr_output:uint = 1;
- mode:CondExecMergeMode = EXACT_ONE;
- }
-
- /// opr Implements NVIDIA Optical Flow SDK.
- table NvOf {
- precision:uint = 1;
- }
-
-
|