@@ -268,6 +268,9 @@ const std::string ENABLE_SMALL_CHANNEL = "ge.enableSmallChannel"; | |||||
// Configure Compress Weight flag | // Configure Compress Weight flag | ||||
const std::string ENABLE_COMPRESS_WEIGHT = "ge.enableCompressWeight"; | const std::string ENABLE_COMPRESS_WEIGHT = "ge.enableCompressWeight"; | ||||
// Configure Sparse Matrix Weight flag | |||||
const std::string ENABLE_SPARSE_MATRIX_WEIGHT = "ge.enableSparseMatrixWeight"; | |||||
// Configure fusion switch file path | // Configure fusion switch file path | ||||
const std::string FUSION_SWITCH_FILE = "ge.fusionSwitchFile"; | const std::string FUSION_SWITCH_FILE = "ge.fusionSwitchFile"; | ||||
@@ -289,6 +292,10 @@ const char_t *const ENABLE_PRINT_OP_PASS = "ge.enablePrintOpPass"; | |||||
// Its value should be file path, default value is "./" | // Its value should be file path, default value is "./" | ||||
const char_t *const DEBUG_DIR = "ge.debugDir"; | const char_t *const DEBUG_DIR = "ge.debugDir"; | ||||
// Configure switch for op status check such as overflow | |||||
// Its value should be true of flase | |||||
const char_t *const STATUS_CHECK = "ge.status_check"; | |||||
// Configure operator compiler cache path | // Configure operator compiler cache path | ||||
// Its value should be file path, default value is "./" | // Its value should be file path, default value is "./" | ||||
const char_t *const OP_COMPILER_CACHE_DIR = "ge.op_compiler_cache_dir"; | const char_t *const OP_COMPILER_CACHE_DIR = "ge.op_compiler_cache_dir"; | ||||
@@ -411,6 +418,7 @@ static const char_t *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str() | |||||
static const char_t *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str(); | static const char_t *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str(); | ||||
static const char_t *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str(); | static const char_t *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str(); | ||||
static const char_t *const ENABLE_COMPRESS_WEIGHT = ge::ENABLE_COMPRESS_WEIGHT.c_str(); | static const char_t *const ENABLE_COMPRESS_WEIGHT = ge::ENABLE_COMPRESS_WEIGHT.c_str(); | ||||
static const char_t *const SPARSITY = ge::ENABLE_SPARSE_MATRIX_WEIGHT.c_str(); | |||||
static const char_t *const COMPRESS_WEIGHT_CONF = "compress_weight_conf"; | static const char_t *const COMPRESS_WEIGHT_CONF = "compress_weight_conf"; | ||||
static const char_t *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str(); | static const char_t *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str(); | ||||
static const char_t *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str(); | static const char_t *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str(); | ||||
@@ -469,6 +477,7 @@ const std::set<std::string> global_options = {CORE_TYPE, | |||||
BUFFER_OPTIMIZE, | BUFFER_OPTIMIZE, | ||||
ENABLE_COMPRESS_WEIGHT, | ENABLE_COMPRESS_WEIGHT, | ||||
COMPRESS_WEIGHT_CONF, | COMPRESS_WEIGHT_CONF, | ||||
SPARSITY, | |||||
PRECISION_MODE, | PRECISION_MODE, | ||||
TUNE_DEVICE_IDS, | TUNE_DEVICE_IDS, | ||||
EXEC_DISABLE_REUSED_MEMORY, | EXEC_DISABLE_REUSED_MEMORY, | ||||
@@ -61,7 +61,8 @@ typedef enum { | |||||
* @brief handle to HCCL communicator | * @brief handle to HCCL communicator | ||||
*/ | */ | ||||
typedef void *HcclComm; | typedef void *HcclComm; | ||||
typedef void *HcclMessage; | |||||
typedef void *HcclRequest; | |||||
/** | /** | ||||
* @brief HCCL Reduction opperation | * @brief HCCL Reduction opperation | ||||
*/ | */ | ||||
@@ -87,8 +88,15 @@ typedef enum { | |||||
HCCL_DATA_TYPE_RESERVED /**< reserved */ | HCCL_DATA_TYPE_RESERVED /**< reserved */ | ||||
} HcclDataType; | } HcclDataType; | ||||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||||
typedef struct { | |||||
int srcRank; // 接收/探测到的msg/信封的发送端rank_id,MPI标准定义,调用者可以访问 | |||||
int tag; // 接收/探测到的msg/信封的tag,MPI标准定义,调用者可以访问 | |||||
int error; // 接收/探测的错误码0:no error,others:传输过程出错,MPI标准定义,调用者可以访问 | |||||
int cancelled; // 指定实现,不建议调用者访问 | |||||
int count; // 接收/探测到的payload大小,指定实现,不建议调用者访问 | |||||
} HcclStatus; | |||||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||||
/** | /** | ||||
* @brief HCCL root info | * @brief HCCL root info | ||||
*/ | */ | ||||
@@ -96,6 +104,7 @@ typedef struct HcclRootInfoDef { | |||||
char internal[HCCL_ROOT_INFO_BYTES]; | char internal[HCCL_ROOT_INFO_BYTES]; | ||||
} HcclRootInfo; | } HcclRootInfo; | ||||
#define HCCL_REQUEST_NULL NULL | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif // __cplusplus | #endif // __cplusplus | ||||
@@ -92,7 +92,7 @@ constexpr size_t kNumTaskWithAtomicAddrCleanTask = 2U; | |||||
// dynamic execute mode | // dynamic execute mode | ||||
const char_t *const kLazyRecompile = "lazy_recompile"; | const char_t *const kLazyRecompile = "lazy_recompile"; | ||||
constexpr size_t kMaxHostMemInputLen = 64U; | |||||
constexpr size_t kMaxHostMemInputLen = 128U; // 64 aligned | |||||
// Data cache, including data address and length | // Data cache, including data address and length | ||||
struct DataBuffer { | struct DataBuffer { | ||||
@@ -35,8 +35,8 @@ class GE_FUNC_VISIBILITY ModelHelper { | |||||
Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, const std::string &output_file, | Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, const std::string &output_file, | ||||
ge::ModelBufferData &model) const; | ge::ModelBufferData &model) const; | ||||
Status SaveToOmRootModel(const GeRootModelPtr &ge_root_model, const SaveParam &save_param, | Status SaveToOmRootModel(const GeRootModelPtr &ge_root_model, const SaveParam &save_param, | ||||
const std::string &output_file, ModelBufferData &model, const bool is_unknown_shape); | |||||
Status SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::string &output_file); | |||||
const std::string &output_file, ModelBufferData &model, const bool is_unknown_shape) const; | |||||
Status SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::string &output_file) const; | |||||
Status LoadModel(const ge::ModelData &model_data); | Status LoadModel(const ge::ModelData &model_data); | ||||
Status LoadRootModel(const ge::ModelData &model_data); | Status LoadRootModel(const ge::ModelData &model_data); | ||||
static void SetModelToGeModel(GeModelPtr &ge_model, Model &model); | static void SetModelToGeModel(GeModelPtr &ge_model, Model &model); | ||||
@@ -68,13 +68,13 @@ class GE_FUNC_VISIBILITY ModelHelper { | |||||
Status GenerateGeRootModel(OmFileLoadHelper &om_load_helper); | Status GenerateGeRootModel(OmFileLoadHelper &om_load_helper); | ||||
Status LoadModelData(OmFileLoadHelper &om_load_helper); | Status LoadModelData(OmFileLoadHelper &om_load_helper); | ||||
Status LoadModelData(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const; | Status LoadModelData(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const; | ||||
Status LoadWeights(OmFileLoadHelper &om_load_helper); | |||||
Status LoadWeights(OmFileLoadHelper &om_load_helper) const; | |||||
Status LoadWeights(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const; | Status LoadWeights(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const; | ||||
Status LoadTask(OmFileLoadHelper &om_load_helper); | |||||
Status LoadTask(OmFileLoadHelper &om_load_helper) const; | |||||
Status LoadTask(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const; | Status LoadTask(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const; | ||||
Status LoadTBEKernelStore(OmFileLoadHelper &om_load_helper); | |||||
Status LoadTBEKernelStore(OmFileLoadHelper &om_load_helper) const; | |||||
Status LoadTBEKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const; | Status LoadTBEKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const; | ||||
Status LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper); | |||||
Status LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper) const; | |||||
Status LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, | Status LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, | ||||
const size_t mode_index) const; | const size_t mode_index) const; | ||||
@@ -49,7 +49,12 @@ enum { | |||||
kAtomic, | kAtomic, | ||||
kKernelLaunchPrepare, | kKernelLaunchPrepare, | ||||
kRtKernelLaunch, | kRtKernelLaunch, | ||||
kRtEventCreateRecord, | |||||
kRtEventSync, | |||||
kRtEventDestroy, | |||||
kRtStreamSync, | |||||
kOpExecute, | kOpExecute, | ||||
kModelExecute, | |||||
kAllocMem, | kAllocMem, | ||||
kCopyH2D, | kCopyH2D, | ||||
kPrepareNode, | kPrepareNode, | ||||
@@ -88,7 +93,7 @@ class ProfilingContext { | |||||
* 因此编译时注册字符串的动作并没有生效。在执行时,动态的打开了profiling,这种场景下,执行时无法拿到注册后字符串 | * 因此编译时注册字符串的动作并没有生效。在执行时,动态的打开了profiling,这种场景下,执行时无法拿到注册后字符串 | ||||
*/ | */ | ||||
bool IsEnabled() const noexcept { | bool IsEnabled() const noexcept { | ||||
return enabled_ && profiler_ != nullptr; | |||||
return enabled_ && (profiler_ != nullptr); | |||||
} | } | ||||
void SetEnable() noexcept { | void SetEnable() noexcept { | ||||
enabled_ = true; | enabled_ = true; | ||||
@@ -231,7 +231,7 @@ constexpr int32_t OM_PROTO_VERSION = 2; | |||||
/// @return string | /// @return string | ||||
/// | /// | ||||
template <typename T> | template <typename T> | ||||
GE_FUNC_VISIBILITY std::string ToString(std::vector<T> &v) { | |||||
GE_FUNC_VISIBILITY std::string ToString(const std::vector<T> &v) { | |||||
std::stringstream ss; | std::stringstream ss; | ||||
ss << "["; | ss << "["; | ||||
for (const T x : v) { | for (const T x : v) { | ||||
@@ -27,7 +27,6 @@ | |||||
#include "framework/common/types.h" | #include "framework/common/types.h" | ||||
#include "graph/tensor.h" | #include "graph/tensor.h" | ||||
#include "graph/ge_tensor.h" | #include "graph/ge_tensor.h" | ||||
#include "runtime/base.h" | |||||
namespace ge { | namespace ge { | ||||
class SingleOp; | class SingleOp; | ||||
@@ -1 +1 @@ | |||||
Subproject commit ab3207e99f94aabf036e1c8b068de0df15ff2d01 | |||||
Subproject commit 569f685a2e6107daf613daf98d4ef8e29bde6e86 |
@@ -42,6 +42,10 @@ enum class AicpuErrMsgType { | |||||
ERR_MSG_TYPE_AICPU = 2, | ERR_MSG_TYPE_AICPU = 2, | ||||
}; | }; | ||||
enum class AicpuExtInfoMsgType { | |||||
EXT_MODEL_ID_MSG_TYPE = 0, | |||||
}; | |||||
typedef struct tagAicpuConfigMsg { | typedef struct tagAicpuConfigMsg { | ||||
uint8_t msgType; | uint8_t msgType; | ||||
uint8_t reserved1; | uint8_t reserved1; | ||||
@@ -52,6 +56,23 @@ typedef struct tagAicpuConfigMsg { | |||||
uint32_t reserved2; | uint32_t reserved2; | ||||
} AicpuConfigMsg; | } AicpuConfigMsg; | ||||
typedef struct tagAicpuModelIdInfo { | |||||
uint32_t modelId; | |||||
uint32_t extendModelId; | |||||
uint32_t extendInfo[13]; | |||||
} AicpuModelIdInfo; | |||||
// 64 bytes | |||||
typedef struct tagAicpuExtendInfo { | |||||
uint8_t msgType; | |||||
uint8_t version; | |||||
uint8_t reserved[2]; | |||||
union { | |||||
AicpuModelIdInfo modelIdMap; | |||||
}; | |||||
} AicpuExtendInfo; | |||||
typedef struct tagAicoreErrMsgInfo { | typedef struct tagAicoreErrMsgInfo { | ||||
uint8_t errType; | uint8_t errType; | ||||
uint8_t version; | uint8_t version; | ||||
@@ -501,6 +501,7 @@ REG_OP(Constant) | |||||
*@brief Creates a file constant tensor, The operator is used to process the very large weight which is store in file. \n | *@brief Creates a file constant tensor, The operator is used to process the very large weight which is store in file. \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*file_path: A string, used to record file path. \n | |||||
*file_id: A string, used to record file id. \n | *file_id: A string, used to record file id. \n | ||||
*shape: data shape. \n | *shape: data shape. \n | ||||
*dtype: data type. \n | *dtype: data type. \n | ||||
@@ -511,7 +512,8 @@ REG_OP(Constant) | |||||
REG_OP(FileConstant) | REG_OP(FileConstant) | ||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \ | .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \ | ||||
DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE})) | DT_UINT8, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE})) | ||||
.REQUIRED_ATTR(file_id, String) | |||||
.ATTR(file_path, String, "") | |||||
.ATTR(file_id, String, "") | |||||
.REQUIRED_ATTR(shape, ListInt) | .REQUIRED_ATTR(shape, ListInt) | ||||
.REQUIRED_ATTR(dtype, Type) | .REQUIRED_ATTR(dtype, Type) | ||||
.OP_END_FACTORY_REG(FileConstant) | .OP_END_FACTORY_REG(FileConstant) | ||||
@@ -1206,6 +1208,39 @@ REG_OP(Copy) | |||||
.OP_END_FACTORY_REG(Copy); | .OP_END_FACTORY_REG(Copy); | ||||
/** | /** | ||||
*@brief copy the src tensor to the dst tensor according the special parameter . \n | |||||
*@par Inputs: | |||||
*Eight inputs, including: | |||||
*dst: A tensor. Must be one of the following types: | |||||
* double, float32, float16, int8, uint8, int16, uint16, int32, uint32, int64, uint64, bool | |||||
*dst_size: A tensor with type int32 | |||||
*dst_stride: A tensor with type int32 | |||||
*dst_storage_offset: A tensor with type int32 | |||||
*src: A tensor. Must be one of the following types: | |||||
* double, float32, float16, int8, uint8, int16, uint16, int32, uint32, int64, uint64, bool | |||||
*src_size: A tensor with type int32 | |||||
*src_stride: A tensor with type int32 | |||||
*src_storage_offset: the storage_offset of src tensor . \n | |||||
*@par Outputs: | |||||
*dst: An ref tensor.Must be one of the following types: | |||||
* double, float32, float16, int8, uint8, int16, uint16, int32, uint32, int64, uint64, bool . \n | |||||
*/ | |||||
REG_OP(ViewCopy) | |||||
.INPUT(dst, TensorType::BasicType()) | |||||
.INPUT(dst_size, TensorType::IndexNumberType()) | |||||
.INPUT(dst_stride, TensorType::IndexNumberType()) | |||||
.INPUT(dst_storage_offset, TensorType::IndexNumberType()) | |||||
.INPUT(src, TensorType::BasicType()) | |||||
.INPUT(src_size, TensorType::IndexNumberType()) | |||||
.INPUT(src_stride, TensorType::IndexNumberType()) | |||||
.INPUT(src_storage_offset, TensorType::IndexNumberType()) | |||||
.OUTPUT(dst, TensorType::BasicType()) | |||||
.OP_END_FACTORY_REG(ViewCopy) | |||||
/** | |||||
*@brief Generates fingerprint values. \n | *@brief Generates fingerprint values. \n | ||||
*@par Inputs: | *@par Inputs: | ||||
@@ -28,7 +28,7 @@ namespace ge { | |||||
*@par Inputs: | *@par Inputs: | ||||
* @li x: A tensor. Must be one of the following types:uint8, int8,int16, int32, | * @li x: A tensor. Must be one of the following types:uint8, int8,int16, int32, | ||||
int64, float16, float, double.The format must be NHWC NCHW NC1HWC0. | |||||
int64, float16, float, double.The format must be NHWC/NCHW. | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li ksize: Kernel size. Input type is int. | *@li ksize: Kernel size. Input type is int. | ||||
@@ -2478,5 +2478,24 @@ REG_OP(GetNextFromQueue) | |||||
.ATTR(output_types, ListType, {}) | .ATTR(output_types, ListType, {}) | ||||
.ATTR(output_shapes, ListListInt, {{}, {}}) | .ATTR(output_shapes, ListListInt, {{}, {}}) | ||||
.OP_END_FACTORY_REG(GetNextFromQueue) | .OP_END_FACTORY_REG(GetNextFromQueue) | ||||
/** | |||||
* @brief OptionalGetValue | |||||
* @par Inputs: | |||||
* optional: A tensor of type variant | |||||
* @par Outputs: | |||||
* components: A list of Tensor objects of output_types | |||||
* @par Attributes: | |||||
* output_types: types of all outputs | |||||
* output_shapes: shapes of all outputs | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(OptionalGetValue) | |||||
.INPUT(optional, TensorType({DT_VARIANT})) | |||||
.DYNAMIC_OUTPUT(components, TensorType::BasicType()) | |||||
.REQUIRED_ATTR(output_types, ListType) | |||||
.REQUIRED_ATTR(output_shapes, ListListInt) | |||||
.OP_END_FACTORY_REG(OptionalGetValue) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_ |
@@ -105,6 +105,54 @@ REG_OP(ProdEnvMatA) | |||||
.ATTR(split_count, Int, 1) | .ATTR(split_count, Int, 1) | ||||
.ATTR(split_index, Int, 0) | .ATTR(split_index, Int, 0) | ||||
.OP_END_FACTORY_REG(ProdEnvMatA) | .OP_END_FACTORY_REG(ProdEnvMatA) | ||||
/** | |||||
* @brief Calculate ProdEnvMatACalcDescrpt. \n | |||||
* | |||||
* @par Inputs: | |||||
* @li distance: A Tensor. Must be one of the following types: float32, float64. | |||||
* @li rij_x: A Tensor. Must be one of the following types: float32, float64. | |||||
* @li rij_y: A Tensor. Must be one of the following types: float32, float64. | |||||
* @li rij_z: A Tensor. Must be one of the following types: float32, float64. | |||||
* @li type: A Tensor. Must be one of the following types: int32. | |||||
* @li natoms: A Tensor. Must be one of the following types: int32. | |||||
* @li mesh: A Tensor. Must be one of the following types: int32. | |||||
* @li davg: A Tensor. Must be one of the following types: float32, float64. | |||||
* @li dstd: A Tensor. Must be one of the following types: float32, float64. \n | |||||
* | |||||
* @par Outputs: | |||||
* @li descrpt: A Tensor. Must be one of the following types: float32, float64. | |||||
* @li descrpt_deriv: A Tensor. Must be one of the following types: float32, float64. \n | |||||
* | |||||
* @par Attributes: | |||||
* @li rcut_a: A Float. | |||||
* @li rcut_r: A Float. | |||||
* @li rcut_r_smth: A Float. | |||||
* @li sel_a: A ListInt. | |||||
* @li sel_r: A ListInt. \n | |||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(ProdEnvMatACalcDescrpt) | |||||
.INPUT(distance, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(rij_x, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(rij_y, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(rij_z, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(type, TensorType({DT_INT32})) | |||||
.INPUT(natoms, TensorType({DT_INT32})) | |||||
.INPUT(mesh, TensorType({DT_INT32})) | |||||
.INPUT(davg, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.INPUT(dstd, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(descrpt, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.OUTPUT(descrpt_deriv, TensorType({DT_FLOAT, DT_DOUBLE})) | |||||
.ATTR(rcut_a, Float, 1.0) | |||||
.ATTR(rcut_r, Float, 1.0) | |||||
.ATTR(rcut_r_smth, Float, 1.0) | |||||
.ATTR(sel_a, ListInt, {}) | |||||
.ATTR(sel_r, ListInt, {}) | |||||
.OP_END_FACTORY_REG(ProdEnvMatACalcDescrpt) | |||||
/** | /** | ||||
* @brief Calculate ProdForceSeA. \n | * @brief Calculate ProdForceSeA. \n | ||||
* | * | ||||
@@ -195,6 +243,9 @@ REG_OP(ProdVirialSeA) | |||||
* Two attributes, including: | * Two attributes, including: | ||||
* @li split_count: A Scalar. | * @li split_count: A Scalar. | ||||
* @li split_index: A Scalar. \n | * @li split_index: A Scalar. \n | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(TabulateFusionGrad) | REG_OP(TabulateFusionGrad) | ||||
.INPUT(table, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | .INPUT(table, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) | ||||
@@ -286,7 +286,7 @@ REG_OP(Minimum) | |||||
*@par Inputs: | *@par Inputs: | ||||
*One inputs, include: | *One inputs, include: | ||||
*x:A Tensor of type float16, float32, int32, int64, double, | *x:A Tensor of type float16, float32, int32, int64, double, | ||||
* complex64, complex128.the format can be [NCHW,NC1HWC0,NHWC,ND,NCHW,NC1HWC0,NHWC,ND] | |||||
* complex64, complex128.the format can be [NCHW,NHWC,ND] | |||||
*@par Outputs: | *@par Outputs: | ||||
*y:A Tensor with same type as "x". \n | *y:A Tensor with same type as "x". \n | ||||
@@ -418,7 +418,7 @@ REG_OP(SquaredDifference) | |||||
*@par Inputs: | *@par Inputs: | ||||
*x: A Tensor of type float16, float32, double, complex64, complex128. | *x: A Tensor of type float16, float32, double, complex64, complex128. | ||||
* the format can be [NCHW,NC1HWC0,NHWC,ND] | |||||
* the format can be [NCHW,NHWC,ND] | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor of the same type as "x". \n | *y: A Tensor of the same type as "x". \n | ||||
@@ -439,7 +439,7 @@ REG_OP(Cos) | |||||
* Two inputs, including: | * Two inputs, including: | ||||
*@li x1: A Tensor. Must be one of the following types: | *@li x1: A Tensor. Must be one of the following types: | ||||
* float16, float32, int32, int8, uint8, float64, int64, uint16, int16, | * float16, float32, int32, int8, uint8, float64, int64, uint16, int16, | ||||
* complex64, complex128, the format can be [NCHW,NC1HWC0,NHWC,ND]. | |||||
* complex64, complex128, the format can be [NCHW,NHWC,ND]. | |||||
*@li x2: A Tensor. Has the same type and format as input "x1". \n | *@li x2: A Tensor. Has the same type and format as input "x1". \n | ||||
*@par Outputs: | *@par Outputs: | ||||
@@ -468,7 +468,7 @@ REG_OP(Div) | |||||
*@li x1: A Tensor. Must be one of the following types: | *@li x1: A Tensor. Must be one of the following types: | ||||
* float16, float32, int32, int8, uint8, double, int16, int64, complex64, | * float16, float32, int32, int8, uint8, double, int16, int64, complex64, | ||||
* complex128, quint8, qint8, qint32, string, bool. the format can be | * complex128, quint8, qint8, qint32, string, bool. the format can be | ||||
* [NCHW, NC1HWC0, NHWC, ND] | |||||
* [NCHW, NHWC, ND] | |||||
*@li x2: A Tensor of the same type and format as "x1". \n | *@li x2: A Tensor of the same type and format as "x1". \n | ||||
*@par Outputs: | *@par Outputs: | ||||
@@ -1177,6 +1177,31 @@ REG_OP(FusedMulAdd) | |||||
.OP_END_FACTORY_REG(FusedMulAdd) | .OP_END_FACTORY_REG(FusedMulAdd) | ||||
/** | /** | ||||
*@brief Confuse mul+add+add with broadcast. \n | |||||
*@par Inputs: | |||||
*Four inputs, including: | |||||
* @li x1: A Tensor. Must be one of the following types:int32, float16, float32. | |||||
* @li x2: A Tensor of the same type as "x1". | |||||
* @li x3: A Tensor of the same type as "x1". | |||||
* @li x4: A Tensor of the same type as "x1". \n | |||||
*@par Outputs: | |||||
* y: A Tensor. Has the same type as "x1". \n | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(FusedMulAddAdd) | |||||
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||||
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||||
.INPUT(x3, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||||
.INPUT(x4, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||||
.OP_END_FACTORY_REG(FusedMulAddAdd) | |||||
/** | |||||
*@brief Returns x1 + x2 element-wise. \n | *@brief Returns x1 + x2 element-wise. \n | ||||
* | * | ||||
@@ -1299,7 +1324,7 @@ REG_OP(AssignSub) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Two inputs, including: | * Two inputs, including: | ||||
*@li y: An NCHW, NC1HWC0, NHWC, ND Tensor. Must be one of the following types: \ | |||||
*@li y: An NCHW, NHWC, ND Tensor. Must be one of the following types: \ | |||||
* float, int32, int8, double, complex64, complex128, half. | * float, int32, int8, double, complex64, complex128, half. | ||||
*@li dy: A Tensor of the same type and format as "y". \n | *@li dy: A Tensor of the same type and format as "y". \n | ||||
@@ -1321,11 +1346,11 @@ REG_OP(RsqrtGrad) | |||||
*@brief Computes hyperbolic sine of "x" element-wise. \n | *@brief Computes hyperbolic sine of "x" element-wise. \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*x: An NCHW, NC1HWC0, NHWC,or ND Tensor of type float, double, complex64, | |||||
*x: An NCHW, NHWC,or ND Tensor of type float, double, complex64, | |||||
* complex128, half. \n | * complex128, half. \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A NCHW, NC1HWC0, NHWC,or ND Tensor of type float, double, complex64, | |||||
*y: A NCHW, NHWC,or ND Tensor of type float, double, complex64, | |||||
* complex128, half. \n | * complex128, half. \n | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
@@ -1365,7 +1390,7 @@ REG_OP(ClipByValue) | |||||
*@par Inputs: | *@par Inputs: | ||||
*x: A Tensor of type float16, float32, double, complex64, complex128. | *x: A Tensor of type float16, float32, double, complex64, complex128. | ||||
* the format can be [NCHW,NC1HWC0,NHWC,ND]. \n | |||||
* the format can be [NCHW,NHWC,ND]. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type as "x". \n | *y: A Tensor. Has the same type as "x". \n | ||||
@@ -1385,7 +1410,7 @@ REG_OP(Cosh) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Two inputs, including: | * Two inputs, including: | ||||
*@li x1: A Tensor. Must be one of the following types:float16, float32, int32, | *@li x1: A Tensor. Must be one of the following types:float16, float32, int32, | ||||
* int8, uint8, double, the format can be [NCHW,NC1HWC0,NHWC,ND]. | |||||
* int8, uint8, double, the format can be [NCHW,NHWC,ND]. | |||||
*@li x2: A Tensor of the same type as "x1". \n | *@li x2: A Tensor of the same type as "x1". \n | ||||
*@par Outputs: | *@par Outputs: | ||||
@@ -1410,7 +1435,7 @@ REG_OP(DivNoNan) | |||||
* One input: \n | * One input: \n | ||||
*x: A Tensor, Must be one of the following types: | *x: A Tensor, Must be one of the following types: | ||||
* int32, uint8, int16, int8, int64, int64, uint16, uint32, uint64, | * int32, uint8, int16, int8, int64, int64, uint16, uint32, uint64, | ||||
* and format can be [NCHW,NC1HWC0,NHWC,ND] | |||||
* and format can be [NCHW,NHWC,ND] | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type and format as "x" | *y: A Tensor. Has the same type and format as "x" | ||||
@@ -1978,7 +2003,7 @@ REG_OP(BitwiseOr) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Two inputs, including: | *Two inputs, including: | ||||
*@li x1: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, uint16, uint32, uint64. | *@li x1: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, uint16, uint32, uint64. | ||||
* The format is NC1HWC0 or ND. Broadcasting is supported. | |||||
* The format is ND. Broadcasting is supported. | |||||
*@li x2: A Tensor. Has the same type and format as "x1". \n | *@li x2: A Tensor. Has the same type and format as "x1". \n | ||||
*@par Outputs: | *@par Outputs: | ||||
@@ -3468,7 +3493,7 @@ REG_OP(AxpyV2) | |||||
.OP_END_FACTORY_REG(AxpyV2) | .OP_END_FACTORY_REG(AxpyV2) | ||||
/** | /** | ||||
* @brief Add the partial values of two tensors in format NC1HWC0. | |||||
* @brief Add the partial values of two tensors. | |||||
* @par Inputs: | * @par Inputs: | ||||
* @li x1: A Tensor in 5HD, and must be one of the following types: float16, | * @li x1: A Tensor in 5HD, and must be one of the following types: float16, | ||||
@@ -1267,7 +1267,7 @@ REG_OP(DecodeAndCropJpeg) | |||||
*@par Inputs: | *@par Inputs: | ||||
* One input: | * One input: | ||||
*x: An NC1HWC0 Tensor. | |||||
*x: A Tensor. | |||||
* Must be one of the following types: float16, float32 . \n | * Must be one of the following types: float16, float32 . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -1304,7 +1304,7 @@ REG_OP(ResizeBilinearV2D) | |||||
*@par Inputs: | *@par Inputs: | ||||
* One input: | * One input: | ||||
*images: An NC1HWC0 Tensor. | |||||
*images: A Tensor. | |||||
* Must be one of the following types: float16, float32 . \n | * Must be one of the following types: float16, float32 . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -1338,7 +1338,7 @@ REG_OP(KeepRatioResizeBilinear) | |||||
*@par Inputs: | *@par Inputs: | ||||
* One input: | * One input: | ||||
*x: An NC1HWC0 Tensor. | |||||
*x: A Tensor. | |||||
* Must be one of the following types: float16, float32, int32, int8, uint8 | * Must be one of the following types: float16, float32, int32, int8, uint8 | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -2310,6 +2310,32 @@ REG_OP(UpsampleNearest1dGrad) | |||||
.OP_END_FACTORY_REG(UpsampleNearest1dGrad) | .OP_END_FACTORY_REG(UpsampleNearest1dGrad) | ||||
/** | /** | ||||
* @brief Function parse image from string to int. \n | |||||
* @par Inputs: | |||||
* contents: A Tensor of type string. 0-D. The JPEG, GIF, PNG, BMP-encoded image. \n | |||||
* @par Attributes: | |||||
* @li channels: An optional int. Defaults to 0. Number of color channels for the decoded image. | |||||
* @li dtype: type of image | |||||
* @li expand_animations: Controls the shape of the returned op's output. If 'true', the returned op will | |||||
produce a 4-D tensor for GIF files. If 'false', the returned op will produce a 3-D tensor for GIF files. | |||||
* @par Outputs: | |||||
* image: A Tensor dtype of uint8, uint16 or float. | |||||
* @par Restrictions: | |||||
* Warning:THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(DecodeImage) | |||||
.INPUT(contents, TensorType({DT_STRING})) | |||||
.OUTPUT(image, TensorType({DT_UINT8, DT_UINT16, DT_FLOAT})) | |||||
.ATTR(channels, Int, 0) | |||||
.ATTR(dtype, Type, DT_UINT8) | |||||
.ATTR(expand_animations, Bool, true) | |||||
.OP_END_FACTORY_REG(DecodeImage) | |||||
/** | |||||
* @brief JPEG encode input image with provided compression quality. \n | * @brief JPEG encode input image with provided compression quality. \n | ||||
* @par Inputs: | * @par Inputs: | ||||
@@ -425,7 +425,7 @@ REG_OP(EndOfSequence) | |||||
*@par Inputs: | *@par Inputs: | ||||
*x: A Tensor of type float16, float32 or double. the format can be | *x: A Tensor of type float16, float32 or double. the format can be | ||||
* [NCHW,NC1HWC0,NHWC,ND] | |||||
* [NCHW,NHWC,ND] | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type and format as "x" . \n | *y: A Tensor. Has the same type and format as "x" . \n | ||||
@@ -462,15 +462,15 @@ REG_OP(Erfc) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Three inputs, including: | *Three inputs, including: | ||||
*@li x: A Tensor of type float32, float16, int32, int64. | |||||
*@li range: A Tensor of type float32,float16,int32, int64. | |||||
*@li x: A Tensor of type float32, int32, int64. float16 is currently not supported. | |||||
*@li range: A Tensor of type float32, int32, int64. float16 is currently not supported. | |||||
*@li nbins: A Tensor of type int32 . \n | *@li nbins: A Tensor of type int32 . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
* dtype: An optional attribute. Defaults to "int32" . \n | * dtype: An optional attribute. Defaults to "int32" . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. A Tensor of type int32 or int64 . \n | |||||
*y: A Tensor. A Tensor of type int32. \n | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with TensorFlow operator HistogramFixedWidth. | * Compatible with TensorFlow operator HistogramFixedWidth. | ||||
@@ -31,9 +31,9 @@ namespace ge { | |||||
*@par Inputs: | *@par Inputs: | ||||
*Three inputs, including: | *Three inputs, including: | ||||
* @li x1: A matrix Tensor. 2D. Must be one of the following types: float16, | * @li x1: A matrix Tensor. 2D. Must be one of the following types: float16, | ||||
* float32, int32. Has format [ND, NHWC, FRACTAL_NZ]. | |||||
* float32, int32. Has format [ND, NHWC]. | |||||
* @li x2: A matrix Tensor. 2D. Must be one of the following types: float16, | * @li x2: A matrix Tensor. 2D. Must be one of the following types: float16, | ||||
* float32, int32. Has format [ND, NHWC, FRACTAL_NZ]. | |||||
* float32, int32. Has format [ND, NHWC]. | |||||
* @li bias: A optional 1D Tensor. Must be one of the following types: float16, | * @li bias: A optional 1D Tensor. Must be one of the following types: float16, | ||||
* float32, int32. Has format [ND, NHWC] . \n | * float32, int32. Has format [ND, NHWC] . \n | ||||
@@ -43,7 +43,7 @@ namespace ge { | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: The result matrix Tensor. 2D. Must be one of the following types: float16, | *y: The result matrix Tensor. 2D. Must be one of the following types: float16, | ||||
* float32, int32. Has format [ND, NHWC, FRACTAL_NZ] . \n | |||||
* float32, int32. Has format [ND, NHWC] . \n | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator BatchMatmul. | * Compatible with the TensorFlow operator BatchMatmul. | ||||
@@ -63,9 +63,9 @@ REG_OP(MatMul) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Four inputs, including: | *Four inputs, including: | ||||
* @li x1: A matrix Tensor. 2D. Must be one of the following types: float32, | * @li x1: A matrix Tensor. 2D. Must be one of the following types: float32, | ||||
float16, int32, int8. Has format [ND, NHWC, FRACTAL_NZ]. | |||||
float16, int32, int8. Has format [ND, NHWC]. | |||||
* @li x2: A matrix Tensor. 2D. Must be one of the following types: float32, | * @li x2: A matrix Tensor. 2D. Must be one of the following types: float32, | ||||
float16, int32, int8. Has format [ND, NHWC, FRACTAL_NZ]. | |||||
float16, int32, int8. Has format [ND, NHWC]. | |||||
* @li bias: A 1D Tensor. Must be one of the following types: float32, | * @li bias: A 1D Tensor. Must be one of the following types: float32, | ||||
float16, int32. Has format [ND, NHWC]. | float16, int32. Has format [ND, NHWC]. | ||||
* @li offset_w: A Optional 1D Tensor for quantized inference. Type is int8. | * @li offset_w: A Optional 1D Tensor for quantized inference. Type is int8. | ||||
@@ -82,7 +82,7 @@ REG_OP(MatMul) | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: The result matrix Tensor. 2D. Must be one of the following types: float32, | *y: The result matrix Tensor. 2D. Must be one of the following types: float32, | ||||
float16, int32. Has format [ND, NHWC, FRACTAL_NZ]. \n | |||||
float16, int32. Has format [ND, NHWC]. \n | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator BatchMatmul. | * Compatible with the TensorFlow operator BatchMatmul. | ||||
@@ -147,24 +147,24 @@ REG_OP(MatMulV2Compress) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Five inputs, including: | *Five inputs, including: | ||||
*@li a: A matrix Tensor. Must be one of the following types: float16, int8. | *@li a: A matrix Tensor. Must be one of the following types: float16, int8. | ||||
* Has format [ND, FRACTAL_NZ]. 2D(ND) or 4D(FRACTAL_NZ). | |||||
* Has format [ND]. | |||||
*@li b: A matrix Tensor. Must be one of the following types: float16, int8. | *@li b: A matrix Tensor. Must be one of the following types: float16, int8. | ||||
* Has format [ND, FRACTAL_NZ, FRACTAL_Z]. 2D(ND) or 4D(FRACTAL_NZ, FRACTAL_Z). | |||||
* Has format ND. | |||||
*@li c: A matrix Tensor. Must be one of the following types: float16, int32, | *@li c: A matrix Tensor. Must be one of the following types: float16, int32, | ||||
* float32. has format [ND, FRACTAL_NZ]. 2D(ND) or 4D(FRACTAL_NZ). | |||||
* float32. has format ND. | |||||
*@li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the following | *@li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the following | ||||
* types: float16, int32, float32. Has format [ND]. | * types: float16, int32, float32. Has format [ND]. | ||||
*@li beta: A 1D Tensor. The shape of beta is [1]. Must be one of the following | *@li beta: A 1D Tensor. The shape of beta is [1]. Must be one of the following | ||||
* types: float16, int32, float32. Has format [ND]. | * types: float16, int32, float32. Has format [ND]. | ||||
* The format of a, b, c has restriction:\n | * The format of a, b, c has restriction:\n | ||||
* When type of a is int8 and type of c is int32, the format of a, b, c should | * When type of a is int8 and type of c is int32, the format of a, b, c should | ||||
* all be ND, or a is FRACTAL_NZ and b is FRACTAL_Z and c is ND.\n | |||||
* all be ND.\n | |||||
* When type of a is int8 and type of c is float32, the format of a, b, c should | * When type of a is int8 and type of c is float32, the format of a, b, c should | ||||
* all be ND or a is FRACTAL_NZ and b is FRACTAL_Z and c is FRACTAL_NZ.\n | |||||
* all be ND.\n | |||||
* When type of a is float16 and type of c is float16, the format of a, b, c | * When type of a is float16 and type of c is float16, the format of a, b, c | ||||
* should all be ND or FRACTAL_NZ.\n | |||||
* should all be ND.\n | |||||
* When type of a is float16 and type of c is float32, the format of a, b, c | * When type of a is float16 and type of c is float32, the format of a, b, c | ||||
* should all be ND or FRACTAL_NZ . \n | |||||
* should all be ND. \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*Two attributes, including: | *Two attributes, including: | ||||
@@ -175,8 +175,7 @@ REG_OP(MatMulV2Compress) | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: The result matrix Tensor. Must be one of the following types: float16, | *y: The result matrix Tensor. Must be one of the following types: float16, | ||||
* float32, int32. Has format [ND, FRACTAL_NZ], the format should be equal to a. | |||||
* 2D(ND) or 4D(FRACTAL_NZ). | |||||
* float32, int32. Has format [ND], the format should be equal to a. | |||||
*/ | */ | ||||
REG_OP(GEMM) | REG_OP(GEMM) | ||||
@@ -196,9 +195,9 @@ REG_OP(GEMM) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Two inputs, including: | *Two inputs, including: | ||||
* @li x1: A matrix Tensor. Must be one of the following types: float16, | * @li x1: A matrix Tensor. Must be one of the following types: float16, | ||||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. | |||||
* float32, int32. 2D or higher. Has format [ND, NHWC]. | |||||
* @li x2: A matrix Tensor. Must be one of the following types: float16, | * @li x2: A matrix Tensor. Must be one of the following types: float16, | ||||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n | |||||
* float32, int32. 2D or higher. Has format [ND, NHWC] . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. | *@li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. | ||||
@@ -206,7 +205,7 @@ REG_OP(GEMM) | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, | *y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, | ||||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. Has the same shape length as "x1" and "x2" . \n | |||||
* float32, int32. 2D or higher. Has format [ND, NHWC]. Has the same shape length as "x1" and "x2" . \n | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator BatchMatmul. | * Compatible with the TensorFlow operator BatchMatmul. | ||||
@@ -227,11 +226,11 @@ REG_OP(BatchMatMul) | |||||
* @par Inputs: | * @par Inputs: | ||||
* Three inputs, including: | * Three inputs, including: | ||||
* @li x1: A matrix Tensor. Must be one of the following types: float16, | * @li x1: A matrix Tensor. Must be one of the following types: float16, | ||||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. | |||||
* float32, int32. 2D or higher. Has format [ND, NHWC]. | |||||
* @li x2: A matrix Tensor. Must be one of the following types: float16, | * @li x2: A matrix Tensor. Must be one of the following types: float16, | ||||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n | |||||
* float32, int32. 2D or higher. Has format [ND, NHWC] . \n | |||||
* @li bias: A matrix Tensor. Must be one of the following types: float16, | * @li bias: A matrix Tensor. Must be one of the following types: float16, | ||||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ] . \n | |||||
* float32, int32. 2D or higher. Has format [ND, NHWC] . \n | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. | * @li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. | ||||
@@ -239,7 +238,7 @@ REG_OP(BatchMatMul) | |||||
* @par Outputs: | * @par Outputs: | ||||
* y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, | * y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, | ||||
* float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. Has the same shape length as "x1" and "x2" . \n | |||||
* float32, int32. 2D or higher. Has format [ND, NHWC]. Has the same shape length as "x1" and "x2" . \n | |||||
* @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator BatchMatmul. | * Compatible with the TensorFlow operator BatchMatmul. | ||||
@@ -86,35 +86,37 @@ REG_OP(L2NormalizeGrad) | |||||
*@brief Performs batch normalization . \n | *@brief Performs batch normalization . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported) | |||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||||
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||||
if input "x" is with format NC1HWC0. Specifies the scaling factor. | |||||
*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||||
if input "x" is with format NC1HWC0. Specifies the offset. | |||||
*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||||
if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the | |||||
* Five inputs, including: (NHWC, NCHW) | |||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW. | |||||
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
Specifies the scaling factor. | |||||
*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Specifies the offset. | |||||
*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
Specifies the mean used for inference. Must be "None" if the | |||||
operation is used for training. | operation is used for training. | ||||
*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be | |||||
5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None" | |||||
*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
Specifies the variance used for inference. Must be "None" | |||||
if the operation is used for training . \n | if the operation is used for training . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001". | |||||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. | |||||
Defaults to "0.0001". | |||||
*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC". | *@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC". | ||||
*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n | |||||
*@li is_training: An optional bool, specifying if the operation is used for training or inference. | |||||
Defaults to "True" . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported) | |||||
*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||||
*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||||
if input "x" is with format NC1HWC0. Specifies the mean of "x". | |||||
* Five outputs, including: (NHWC, NCHW) | |||||
*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW. | |||||
*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
Specifies the mean of "x". | |||||
*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | *@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | ||||
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x". | |||||
Specifies the variance of "x". | |||||
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | ||||
Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. | |||||
Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. | |||||
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | ||||
*@li reserve_space_3: An optional Tensor of type float32. For compatibility with tensorflow, only has one useless emement. \n | |||||
*@li reserve_space_3: An optional Tensor of type float32. For compatibility with tensorflow, | |||||
only has one useless emement. \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, | *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, | ||||
@@ -264,17 +266,17 @@ REG_OP(SyncBatchNormBackwardElemt) | |||||
*@brief Performs batch normalization . \n | *@brief Performs batch normalization . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported) | |||||
*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D. | |||||
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. Must be 6D | |||||
if input "x" is with format NDC1HWC0. Specifies the scaling factor. | |||||
*@li offset: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D | |||||
if input "x" is with format NC1HWC0. Specifies the offset. | |||||
*@li mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D | |||||
if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the | |||||
* Five inputs, including: (NHWC, NCHW) | |||||
*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC or NCDHW. | |||||
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. | |||||
Specifies the scaling factor. | |||||
*@li offset: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. | |||||
Specifies the offset. | |||||
*@li mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. | |||||
Specifies the mean used for inference. Must be "None" if the | |||||
operation is used for training. | operation is used for training. | ||||
*@li variance: A Tensor of type float32. Must be 3D if input "x" is with format NHWC or NCHW. Must be | |||||
5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None" | |||||
*@li variance: A Tensor of type float32. Must be 3D if input "x" is with format NHWC or NCHW. | |||||
Specifies the variance used for inference. Must be "None" | |||||
if the operation is used for training . \n | if the operation is used for training . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -283,16 +285,16 @@ if the operation is used for training . \n | |||||
*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n | *@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported) | |||||
*@li y: A 3D or 6D Tensor of type float16 or float32 for the normalized "x", with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D. | |||||
*@li batch_mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D | |||||
if input "x" is with format NDC1HWC0. Specifies the mean of "x". | |||||
* Five outputs, including: (NHWC, NCHW) | |||||
*@li y: A 3D or 6D Tensor of type float16 or float32 for the normalized "x", with format NDHWC or NCDHW. | |||||
*@li batch_mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. | |||||
Specifies the mean of "x". | |||||
*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. | *@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. | ||||
Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x". | |||||
Specifies the variance of "x". | |||||
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. | *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. | ||||
Must be 6D if input "x" is with format NDC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. | |||||
Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. | |||||
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | ||||
Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n | |||||
Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, | *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, | ||||
@@ -375,11 +377,11 @@ REG_OP(BatchNormExt2) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Five inputs, including: | * Five inputs, including: | ||||
*@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the gradient. | |||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0. | |||||
*@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. | |||||
*@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm. | |||||
*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . | |||||
*@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, for the gradient. | |||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW. | |||||
*@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW. | |||||
*@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW. It is an output of BatchNorm. | |||||
*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW. It is an output of BatchNorm . | |||||
*@li reserve_space_3: A 1D optional Tensor of type float32. It is an output of BatchNorm . \n | *@li reserve_space_3: A 1D optional Tensor of type float32. It is an output of BatchNorm . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -388,11 +390,11 @@ REG_OP(BatchNormExt2) | |||||
*@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n | *@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x". | |||||
*@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "scale". | |||||
*@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "offset". | |||||
*@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output. | |||||
*@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output . \n | |||||
*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, for the offset of "x". | |||||
*@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, for the offset of "scale". | |||||
*@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, for the offset of "offset". | |||||
*@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW. Pass "None" to skip this output. | |||||
*@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW. Pass "None" to skip this output . \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
* The preceding layer of this operator must be operator BatchNorm . \n | * The preceding layer of this operator must be operator BatchNorm . \n | ||||
@@ -423,11 +425,11 @@ REG_OP(BatchNormGrad) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Five inputs, including: | * Five inputs, including: | ||||
*@li y_backprop: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0, for the gradient. | |||||
*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0. | |||||
*@li scale: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0. | |||||
*@li reserve_space_1: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm. | |||||
*@li reserve_space_2: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm . \n | |||||
*@li y_backprop: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, for the gradient. | |||||
*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW. | |||||
*@li scale: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW. | |||||
*@li reserve_space_1: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW. It is an output of BatchNorm. | |||||
*@li reserve_space_2: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW. It is an output of BatchNorm . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". | *@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". | ||||
@@ -435,11 +437,11 @@ REG_OP(BatchNormGrad) | |||||
*@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n | *@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x". | |||||
*@li scale_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "scale". | |||||
*@li *offset_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "offset". | |||||
*@li *reserve_space_4: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output. | |||||
*@li *reserve_space_5: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output . \n | |||||
*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, for the offset of "x". | |||||
*@li scale_backprop: A Tensor of type float32, with format NDHWC, NCDHW, for the offset of "scale". | |||||
*@li *offset_backprop: A Tensor of type float32, with format NDHWC, NCDHW, for the offset of "offset". | |||||
*@li *reserve_space_4: A Tensor of type float32, with shape NDHWC, NCDHW. Pass "None" to skip this output. | |||||
*@li *reserve_space_5: A Tensor of type float32, with shape NDHWC, NCDHW. Pass "None" to skip this output . \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
* The preceding layer of this operator must be operator BatchNorm . \n | * The preceding layer of this operator must be operator BatchNorm . \n | ||||
@@ -515,7 +517,7 @@ REG_OP(BatchNormGradExt2) | |||||
*@brief Performs batch normalization . \n | *@brief Performs batch normalization . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW. | |||||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | ||||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | ||||
*@li momentum: A Tensor,represents the mean and the variance's scale factor | *@li momentum: A Tensor,represents the mean and the variance's scale factor | ||||
@@ -545,7 +547,7 @@ REG_OP(BNInference) | |||||
*@brief Performs batch normalization . \n | *@brief Performs batch normalization . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW. | |||||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | ||||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | ||||
*@li scale: An optional tensor of type float16 or float32, no use | *@li scale: An optional tensor of type float16 or float32, no use | ||||
@@ -268,7 +268,7 @@ REG_OP(ROIAlign) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Two inputs, including: | * Two inputs, including: | ||||
*@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16. | |||||
*@li x: An NCHW feature map of type is float32 or float16. | |||||
*@li img: source image. Has the same type and format as "x" . \n | *@li img: source image. Has the same type and format as "x" . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -316,12 +316,12 @@ REG_OP(PriorBox) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Six inputs, including: | * Six inputs, including: | ||||
*@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16. | |||||
*@li x: An NCHW feature map of type is float32 or float16. | |||||
*@li img: source image. Has the same type and format as "x". | *@li img: source image. Has the same type and format as "x". | ||||
*@li data_h: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map height. | |||||
*@li data_w: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map width. | |||||
*@li box_height: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the height of each prior box. | |||||
*@li box_width: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the width of each prior box . \n | |||||
*@li data_h: An NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map height. | |||||
*@li data_w: An NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map width. | |||||
*@li box_height: An NCHW tensor of type float32 or float16, specifying the height of each prior box. | |||||
*@li box_width: An NCHW tensor of type float32 or float16, specifying the width of each prior box . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li min_size: A required float32, specifying the minimum edge length of a square prior box. | *@li min_size: A required float32, specifying the minimum edge length of a square prior box. | ||||
@@ -371,7 +371,7 @@ REG_OP(PriorBoxD) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Six inputs, including: | * Six inputs, including: | ||||
*@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16. | |||||
*@li x: An NCHW feature map of type is float32 or float16. | |||||
*@li img: source image. Has the same type and format as "x". | *@li img: source image. Has the same type and format as "x". | ||||
*@li boxes: An ND tensor of type float32 or float16, specifying the prior box information. Same as output y | *@li boxes: An ND tensor of type float32 or float16, specifying the prior box information. Same as output y | ||||
@@ -420,7 +420,7 @@ REG_OP(PriorBoxDV2) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Two inputs, including: | * Two inputs, including: | ||||
*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature | |||||
*@li x: A tensor of type float16 or float32, describing the feature | |||||
* map, dimension C1 must be equal to | * map, dimension C1 must be equal to | ||||
* (int(output_dim+15)/C0))*group_size*group_size. | * (int(output_dim+15)/C0))*group_size*group_size. | ||||
*@li rois: A tensor of type float16 or float32, with shape | *@li rois: A tensor of type float16 or float32, with shape | ||||
@@ -438,7 +438,7 @@ REG_OP(PriorBoxDV2) | |||||
* coordinates to the ROI coordinates . \n | * coordinates to the ROI coordinates . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: An NC1HWC0 tensor of type float16 or float32, describing the result | |||||
*y: A tensor of type float16 or float32, describing the result | |||||
* feature map . \n | * feature map . \n | ||||
*@attention Constraints: | *@attention Constraints: | ||||
@@ -1171,7 +1171,7 @@ REG_OP(SPP) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Three inputs, including: | * Three inputs, including: | ||||
*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature | |||||
*@li x: A tensor of type float16 or float32, describing the feature | |||||
* map. The data of x must be greater than or equal to "0.0". | * map. The data of x must be greater than or equal to "0.0". | ||||
*@li rois: A tensor of type float16 or float32, with 3D shape | *@li rois: A tensor of type float16 or float32, with 3D shape | ||||
* [batch, 5, roi_max_num], describing the RIOs. Each ROI consists of five | * [batch, 5, roi_max_num], describing the RIOs. Each ROI consists of five | ||||
@@ -1195,7 +1195,7 @@ REG_OP(SPP) | |||||
* coordinates of width to the ROI coordinates . \n | * coordinates of width to the ROI coordinates . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: An NC1HWC0 tensor of type float16 or float32, describing the result | |||||
*y: A tensor of type float16 or float32, describing the result | |||||
* feature map . \n | * feature map . \n | ||||
*@attention Constraints: | *@attention Constraints: | ||||
@@ -1860,7 +1860,7 @@ REG_OP(RoiExtractor) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Two inputs, including: | * Two inputs, including: | ||||
*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature | |||||
*@li x: A tensor of type float16 or float32, describing the feature | |||||
* map, dimension C1 must be equal to | * map, dimension C1 must be equal to | ||||
* (int(output_dim+15)/C0))*group_size*group_size. | * (int(output_dim+15)/C0))*group_size*group_size. | ||||
*@li rois: A tensor of type float16 or float32, with shape | *@li rois: A tensor of type float16 or float32, with shape | ||||
@@ -1878,7 +1878,7 @@ REG_OP(RoiExtractor) | |||||
* coordinates to the ROI coordinates . \n | * coordinates to the ROI coordinates . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: An NC1HWC0 tensor of type float16 or float32, describing the result | |||||
*y: A tensor of type float16 or float32, describing the result | |||||
* feature map . \n | * feature map . \n | ||||
*@attention Constraints: | *@attention Constraints: | ||||
@@ -1898,7 +1898,7 @@ REG_OP(PSROIPoolingV2) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Two inputs, including: | * Two inputs, including: | ||||
*@li x: An NC1HWC0 tensor of type float16 or float32, describing the result | |||||
*@li x: A tensor of type float16 or float32, describing the result | |||||
* feature map . \n | * feature map . \n | ||||
*@li rois: A tensor of type float16 or float32, with shape | *@li rois: A tensor of type float16 or float32, with shape | ||||
* [batch, 5, rois_num], describing the ROIs, each ROI consists of five | * [batch, 5, rois_num], describing the ROIs, each ROI consists of five | ||||
@@ -1916,7 +1916,7 @@ REG_OP(PSROIPoolingV2) | |||||
*@li input_size: A required listInt, mapping the gradinput size: (H, W) | *@li input_size: A required listInt, mapping the gradinput size: (H, W) | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: An NC1HWC0 tensor of type float16 or float32, describing the feature | |||||
*y: A tensor of type float16 or float32, describing the feature | |||||
* map, dimension C1 must be equal to | * map, dimension C1 must be equal to | ||||
* (int(output_dim+15)/C0))*group_size*group_size. | * (int(output_dim+15)/C0))*group_size*group_size. | ||||
@@ -104,9 +104,8 @@ REG_OP(SoftmaxCrossEntropyWithLogits) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Two inputs, including: | * Two inputs, including: | ||||
* @li softmax: Output of the softmax operator. Must be one of the following | * @li softmax: Output of the softmax operator. Must be one of the following | ||||
* types: float16, float31, int32, int8, uint8. The format is NC1HWC0 or DN. | |||||
* @li grad_softmax: A Tensor. Has the same shape and type as "softmax". | |||||
* The format is NC1HWC0 or DN . \n | |||||
* types: float16, float31, int32, int8, uint8. | |||||
* @li grad_softmax: A Tensor. Has the same shape and type as "softmax".\n | |||||
*@par Attributes: | *@par Attributes: | ||||
* axes: An optional list of ints. Defaults to "{-1}" . \n | * axes: An optional list of ints. Defaults to "{-1}" . \n | ||||
@@ -1101,8 +1100,8 @@ REG_OP(GroupNorm) | |||||
*@brief Performs instance normalization . \n | *@brief Performs instance normalization . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
* Five inputs, including: (NC1HWC0, supported) | |||||
*@li x: A 5D Tensor of type float16 or float32, NC1HWC0. | |||||
* Five inputs, including: | |||||
*@li x: A 5D Tensor of type float16 or float32. | |||||
*@li gamma: A Tensor of type float32. | *@li gamma: A Tensor of type float32. | ||||
A 5D Tensor for scaling factor, to scale the normalized x. | A 5D Tensor for scaling factor, to scale the normalized x. | ||||
*@li beta: A Tensor of type float32. | *@li beta: A Tensor of type float32. | ||||
@@ -1121,7 +1120,7 @@ the value used for the running_mean and running_var computation. Default: "0.1". | |||||
variance to avoid dividing by zero. Defaults to "0.00001" . \n | variance to avoid dividing by zero. Defaults to "0.00001" . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
* Three outputs, including: (NHWC, NCHW NC1HWC0 supported) | |||||
* Three outputs, including: (NHWC, NCHW supported) | |||||
*@li y: A 5D tensor of type float16 or float32 for the normalized "x", | *@li y: A 5D tensor of type float16 or float32 for the normalized "x", | ||||
*@li batch_mean: A Tensor of type float32. | *@li batch_mean: A Tensor of type float32. | ||||
Specifies the mean of "x". | Specifies the mean of "x". | ||||
@@ -1154,7 +1153,7 @@ REG_OP(InstanceNormV2) | |||||
*@brief Performs instance normalization for inference. | *@brief Performs instance normalization for inference. | ||||
*@par Inputs:\n | *@par Inputs:\n | ||||
* Five inputs, including: (NC1HWC0 supported) | |||||
* Five inputs, including: | |||||
*@li x: A Tensor of type float16 or float32. | *@li x: A Tensor of type float16 or float32. | ||||
*@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma. | *@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma. | ||||
*@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta. | *@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta. | ||||
@@ -54,17 +54,17 @@ REG_OP(InTopKV2) | |||||
*@brief Performs batch normalization . \n | *@brief Performs batch normalization . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported) | |||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||||
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||||
if input "x" is with format NC1HWC0. Specifies the scaling factor. | |||||
*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||||
if input "x" is with format NC1HWC0. Specifies the offset. | |||||
*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||||
if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the | |||||
* Five inputs, including: (NHWC, NCHW supported) | |||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D. | |||||
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
Specifies the scaling factor. | |||||
*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
Specifies the offset. | |||||
*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
Specifies the mean used for inference. Must be "None" if the | |||||
operation is used for training. | operation is used for training. | ||||
*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be | |||||
5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None" | |||||
*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
Specifies the variance used for inference. Must be "None" | |||||
if the operation is used for training . \n | if the operation is used for training . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -73,16 +73,16 @@ if the operation is used for training . \n | |||||
*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n | *@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported) | |||||
*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||||
*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||||
if input "x" is with format NC1HWC0. Specifies the mean of "x". | |||||
* Five outputs, including: (NHWC, NCHWsupported) | |||||
*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D. | |||||
*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
Specifies the mean of "x". | |||||
*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | *@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | ||||
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x". | |||||
pecifies the variance of "x". | |||||
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | ||||
Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. | |||||
Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. | |||||
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | ||||
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n | |||||
Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, | *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, | ||||
@@ -109,18 +109,19 @@ REG_OP(FusedBatchNormV2) | |||||
* @brief Large amount of data sort.First operator of TopK. | * @brief Large amount of data sort.First operator of TopK. | ||||
* @par Inputs: | * @par Inputs: | ||||
* two input, including: | * two input, including: | ||||
* @li input_data: A Tensor. Data to be sorted. Support float16 | |||||
* @li input_index: A Tensor. Range(0, 2048). Datatype and format is same as input_data. | |||||
* @li input_data: A Tensor. Data to be sorted. Support float16 or float32. | |||||
* @li input_index: A Tensor. Range(0, 2048). Support float16 or int32. | |||||
* @par Attributes: | * @par Attributes: | ||||
* k_num: Int.Number to be sorted. | * k_num: Int.Number to be sorted. | ||||
* @par Outputs: | * @par Outputs: | ||||
* One output, including: | * One output, including: | ||||
* output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. | * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. | ||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(SegmentSort) | REG_OP(SegmentSort) | ||||
.INPUT(input_data, TensorType({DT_FLOAT16})) | |||||
.INPUT(input_index, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(output_proposal, TensorType({DT_FLOAT16})) | |||||
.INPUT(input_data, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.INPUT(input_index, TensorType({DT_FLOAT16,DT_INT32})) | |||||
.OUTPUT(output_proposal, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.REQUIRED_ATTR(k_num, Int) | .REQUIRED_ATTR(k_num, Int) | ||||
.OP_END_FACTORY_REG(SegmentSort) | .OP_END_FACTORY_REG(SegmentSort) | ||||
@@ -128,36 +129,174 @@ REG_OP(SegmentSort) | |||||
* @brief: Large amount of data sort.Second operator of TopK. | * @brief: Large amount of data sort.Second operator of TopK. | ||||
* @par Inputs: | * @par Inputs: | ||||
* One input, including: | * One input, including: | ||||
* input_proposal: A Tensor. Proposal sorted for each channel. Support float16 | |||||
* input_proposal: A Tensor. Proposal sorted for each channel. Support float16 or float32 | |||||
* @par Attributes: | * @par Attributes: | ||||
* k_num: Int.Number to be sorted. | * k_num: Int.Number to be sorted. | ||||
* include_index: Bool.include_index is false,output proposal. include_index is true, output data and index. | |||||
* @par Outputs: | * @par Outputs: | ||||
* One output, including: | |||||
* Two output, including: | |||||
* output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. | * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. | ||||
* output_index: A Tensor.If include_index is true, output index. | |||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(MultiMerge) | REG_OP(MultiMerge) | ||||
.INPUT(input_proposal, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(output_proposal, TensorType({DT_FLOAT16})) | |||||
.INPUT(input_proposal, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(output_proposal, TensorType({DT_FLOAT16,DT_FLOAT})) | |||||
.OUTPUT(output_index, TensorType({DT_INT32})) | |||||
.REQUIRED_ATTR(k_num, Int) | .REQUIRED_ATTR(k_num, Int) | ||||
.ATTR(include_index, Bool, false) | |||||
.OP_END_FACTORY_REG(MultiMerge) | .OP_END_FACTORY_REG(MultiMerge) | ||||
/** | /** | ||||
* @brief Large amount of data sort.Third operator of TopK. | |||||
* @brief MultiHeadAttention. | |||||
* @par Inputs: | * @par Inputs: | ||||
* One input, including: | |||||
* input_proposal: A Tensor. Proposal sorted for each channel. Support float16 | |||||
* thirteen input, including: | |||||
* @li query: A Tensor. Query of Attention. Support float16 | |||||
* @li key: A Tensor. Key of Attention. Support float16 | |||||
* @li value: A Tensor. Value of Attention. Support float16 | |||||
* @li query_weight: A Tensor. QueryWeight of Attention. Support float16 | |||||
* @li key_weight: A Tensor. KeyWeight of Attention. Support float16 | |||||
* @li value_weight: A Tensor. ValueWeight of Attention. Support float16 | |||||
* @li attn_mask: A Tensor. AttentionMask of Attention. Support float16 | |||||
* @li out_proj_weight: A Tensor. OutProjWeight of Attention. Support float16 | |||||
* @li query_bias: Optional Tensor. QueryBias of Attention. Support float16 | |||||
* @li key_bias: Optional Tensor. KeyBias of Attention. Support float16 | |||||
* @li value_bias: Optional Tensor. ValueBias of Attention. Support float16 | |||||
* @li out_proj_bias: Optional Tensor. OutProjBias of Attention. Support float16 | |||||
* @li dropout_mask: Optional Tensor. DropOutMask of Attention. Support uint8 \n | |||||
* @par Attributes: | * @par Attributes: | ||||
* k_num: Int.Number to be sorted. | |||||
* @li attn_head_num: Attention Head numbers, Support int | |||||
* @li attn_dim_per_head: Attention dim of a Head, Support int | |||||
* @li src_len: source length, Support int | |||||
* @li tgt_len: target length, Support int | |||||
* @li keep_prob: dropout keep probability, Support float | |||||
* @li softmax_use_float: SoftMax Use Float32 to keep precision, Support bool \n | |||||
* @par Outputs: | * @par Outputs: | ||||
* Two output, including: | |||||
* @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted. | |||||
* @li output_index: A Tensor. int32. Data index. | |||||
* Eight output, including: | |||||
* @li y: A Tensor. Result of Attention. Support float16 | |||||
* @li dropout_mask: DropOutMask of Attention. Support uint8 | |||||
* @li query_res: Query Result of Attention. Support float16 | |||||
* @li key_res: Key Result of Attention. Support float16 | |||||
* @li value_res: Value Result of Attention. Support float16 | |||||
* @li attn_scores: Attention Scores of SoftMax. Support float16, float | |||||
* @li attn_res: Attention Result of SoftMax. Support float16 | |||||
* @li context: Context of Attention. Support float16 | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(SingleMerge) | |||||
.INPUT(input_proposal, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(output_data, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(output_index, TensorType({DT_INT32})) | |||||
.REQUIRED_ATTR(k_num, Int) | |||||
.OP_END_FACTORY_REG(SingleMerge) | |||||
REG_OP(MultiHeadAttention) | |||||
.INPUT(query, TensorType({DT_FLOAT16})) | |||||
.INPUT(key, TensorType({DT_FLOAT16})) | |||||
.INPUT(value, TensorType({DT_FLOAT16})) | |||||
.INPUT(query_weight, TensorType({DT_FLOAT16})) | |||||
.INPUT(key_weight, TensorType({DT_FLOAT16})) | |||||
.INPUT(value_weight, TensorType({DT_FLOAT16})) | |||||
.INPUT(attn_mask, TensorType({DT_FLOAT16})) | |||||
.INPUT(out_proj_weight, TensorType({DT_FLOAT16})) | |||||
.OPTIONAL_INPUT(query_bias, TensorType({DT_FLOAT16})) | |||||
.OPTIONAL_INPUT(key_bias, TensorType({DT_FLOAT16})) | |||||
.OPTIONAL_INPUT(value_bias, TensorType({DT_FLOAT16})) | |||||
.OPTIONAL_INPUT(out_proj_bias, TensorType({DT_FLOAT16})) | |||||
.OPTIONAL_INPUT(dropout_mask, TensorType({DT_UINT8})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(dropout_mask, TensorType({DT_UINT8})) | |||||
.OUTPUT(query_res, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(key_res, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(value_res, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(attn_scores, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.OUTPUT(attn_res, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(context, TensorType({DT_FLOAT16})) | |||||
.REQUIRED_ATTR(attn_head_num, Int) | |||||
.REQUIRED_ATTR(attn_dim_per_head, Int) | |||||
.REQUIRED_ATTR(src_len, Int) | |||||
.REQUIRED_ATTR(tgt_len, Int) | |||||
.REQUIRED_ATTR(keep_prob, Float) | |||||
.REQUIRED_ATTR(softmax_use_float, Bool) | |||||
.OP_END_FACTORY_REG(MultiHeadAttention) | |||||
/** | |||||
* @brief MultiHeadAttentionGrad. | |||||
* @par Inputs: | |||||
* thirteen input, including: | |||||
* @li query: A Tensor. Query of Attention. Support float16 | |||||
* @li key: A Tensor. Key of Attention. Support float16 | |||||
* @li value: A Tensor. Value of Attention. Support float16 | |||||
* @li query_weight: A Tensor. QueryWeight of Attention. Support float16 | |||||
* @li key_weight: A Tensor. KeyWeight of Attention. Support float16 | |||||
* @li value_weight: A Tensor. ValueWeight of Attention. Support float16 | |||||
* @li out_proj_weight: A Tensor. OutProjWeight of Attention. Support float16 | |||||
* @li query_res: A Tensor. Query Result of Attention. Support float16 | |||||
* @li key_res: A Tensor. Key Result of Attention. Support float16 | |||||
* @li value_res: A Tensor. Value Result of Attention. Support float16 | |||||
* @li attn_scores: A Tensor. Attention Scores of Attention. Support float16, float | |||||
* @li attn_res: A Tensor. Attention Result of Attention. Support float16 | |||||
* @li context: A Tensor. Context of Attention. Support float16 | |||||
* @li y_grad: A Tensor. Grad of Attention. Support float16 | |||||
* @li dropout_mask: : A Tensor. Query Result of Attention. Support uint8 \n | |||||
* @par Attributes: | |||||
* @li attn_head_num: Attention Head numbers, Support int | |||||
* @li attn_dim_per_head: Attention dim of a Head, Support int | |||||
* @li src_len: source length, Support int | |||||
* @li tgt_len: target length, Support int | |||||
* @li keep_prob: dropout keep probability, Support float | |||||
* @li softmax_use_float: SoftMax Use Float32 to keep precision, Support bool | |||||
* @li bias_grad_mask: mask for attention has bias grad, Support list bool \n | |||||
* @par Outputs: | |||||
* Eight output, including: | |||||
* @li query_weight_grad: QueryWeight Grad of Attention. Support float16 | |||||
* @li key_weight_grad: KeyWeight Grad of Attention. Support float16 | |||||
* @li value_weight_grad: ValueWeight Grad of Attention. Support float16 | |||||
* @li out_proj_weight_grad: OutProjWeight Grad of Attention. Support float16 | |||||
* @li query_grad: Query Grad of Attention. Support float16 | |||||
* @li key_grad: Key Grad of Attention. Support float16 | |||||
* @li value_grad: Value Grad of Attention. Support float16 | |||||
* @li query_bias_grad: QueryBias Grad of Attention. Support float16 | |||||
* @li key_bias_grad: KeyBias Grad of Attention. Support float16 | |||||
* @li value_bias_grad: ValueBias Grad of Attention. Support float16 | |||||
* @li out_proj_bias_grad: OutProjBias Grad of Attention. Support float16 | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | |||||
REG_OP(MultiHeadAttentionGrad) | |||||
.INPUT(query, TensorType({DT_FLOAT16})) | |||||
.INPUT(key, TensorType({DT_FLOAT16})) | |||||
.INPUT(value, TensorType({DT_FLOAT16})) | |||||
.INPUT(query_weight, TensorType({DT_FLOAT16})) | |||||
.INPUT(key_weight, TensorType({DT_FLOAT16})) | |||||
.INPUT(value_weight, TensorType({DT_FLOAT16})) | |||||
.INPUT(out_proj_weight, TensorType({DT_FLOAT16})) | |||||
.INPUT(query_res, TensorType({DT_FLOAT16})) | |||||
.INPUT(key_res, TensorType({DT_FLOAT16})) | |||||
.INPUT(value_res, TensorType({DT_FLOAT16})) | |||||
.INPUT(attn_scores, TensorType({DT_FLOAT16, DT_FLOAT})) | |||||
.INPUT(attn_res, TensorType({DT_FLOAT16})) | |||||
.INPUT(context, TensorType({DT_FLOAT16})) | |||||
.INPUT(y_grad, TensorType({DT_FLOAT16})) | |||||
.OPTIONAL_INPUT(dropout_mask, TensorType({DT_UINT8})) | |||||
.OUTPUT(query_weight_grad, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(key_weight_grad, TensorType({DT_UINT8})) | |||||
.OUTPUT(value_weight_grad, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(out_proj_weight_grad, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(query_grad, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(key_grad, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(value_grad, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(query_bias_grad, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(key_bias_grad, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(value_bias_grad, TensorType({DT_FLOAT16})) | |||||
.OUTPUT(out_proj_bias_grad, TensorType({DT_FLOAT16})) | |||||
.REQUIRED_ATTR(attn_head_num, Int) | |||||
.REQUIRED_ATTR(attn_dim_per_head, Int) | |||||
.REQUIRED_ATTR(src_len, Int) | |||||
.REQUIRED_ATTR(tgt_len, Int) | |||||
.REQUIRED_ATTR(keep_prob, Float) | |||||
.REQUIRED_ATTR(softmax_use_float, Bool) | |||||
.REQUIRED_ATTR(bias_grad_mask, ListBool) | |||||
.OP_END_FACTORY_REG(MultiHeadAttentionGrad) | |||||
}// namespace ge | }// namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ |
@@ -81,10 +81,16 @@ REG_OP(Pooling) | |||||
*x: A tensor of type float16, float32, double . \n | *x: A tensor of type float16, float32, double . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window, where N = C = 1, and H and W are positive integers within the range [1, 255]. | |||||
*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. The strides of the H and W dimensions are positive integers within the range [1, 63]. | |||||
*@li padding: A required string, specifying the padding algorithm, either "VALID" or "SAME". With "SAME" means that the outputs will have the same spatial dimensions as its inputs. With "VALID" means no padding. | |||||
*@li data_format: An optional string, specifying the data format of "ksize" and "strides", either "NCHW", "NC1HWC0", or "NHWC" (default) . \n | |||||
*@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window, | |||||
* where N = C = 1, and H and W are positive integers within the range [1, 255]. | |||||
*@li strides: A required list of 4 ints, specifying the stride of the sliding window. | |||||
* The strides of the N and C dimensions are 1. | |||||
* The strides of the H and W dimensions are positive integers within the range [1, 63]. | |||||
*@li padding: A required string, specifying the padding algorithm, | |||||
* either "VALID" or "SAME". With "SAME" means that the outputs will have the same spatial dimensions as its inputs. | |||||
* With "VALID" means no padding. | |||||
*@li data_format: An optional string, specifying the data format of "ksize" and "strides", | |||||
* either "NCHW", or "NHWC" (default) . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: The average pooled output tensor. Has the same type and format as input "x" . \n | *y: The average pooled output tensor. Has the same type and format as input "x" . \n | ||||
@@ -94,7 +100,8 @@ REG_OP(Pooling) | |||||
*@li Only single input and single output are supported. | *@li Only single input and single output are supported. | ||||
*@li Global pooling is supported. | *@li Global pooling is supported. | ||||
*@li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. ksize_H * ksize_W < 256 | *@li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. ksize_H * ksize_W < 256 | ||||
*@li Due to instruction restrictions, the values of "strides_h" and "strides_w" are positive integers within the range [1, 63]. | |||||
*@li Due to instruction restrictions, | |||||
* the values of "strides_h" and "strides_w" are positive integers within the range [1, 63]. | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator AvgPool. | * Compatible with the TensorFlow operator AvgPool. | ||||
*/ | */ | ||||
@@ -114,11 +121,18 @@ REG_OP(AvgPool) | |||||
*x: A tensor of type float16, float32, double. | *x: A tensor of type float16, float32, double. | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window, where N = C = 1, and H and W are positive integers within the range [1, 255]. | |||||
*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. The strides of the H and W dimensions are positive integers within the range [1, 63]. | |||||
*@li padding_mode: A required string, specifying the padding algorithm, either "VALID", "SAME" and "CALCULATED". With "SAME" means that the outputs will have the same spatial dimensions as its inputs. With "VALID" means no padding. | |||||
*@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window, | |||||
* where N = C = 1, and H and W are positive integers within the range [1, 255]. | |||||
*@li strides: A required list of 4 ints, specifying the stride of the sliding window. | |||||
* The strides of the N and C dimensions are 1. | |||||
* The strides of the H and W dimensions are positive integers within the range [1, 63]. | |||||
*@li padding_mode: A required string, specifying the padding algorithm, | |||||
* either "VALID", "SAME" and "CALCULATED". | |||||
* With "SAME" means that the outputs will have the same spatial dimensions as its inputs. | |||||
* With "VALID" means no padding. | |||||
*@li pads: Pad value when padding_mode is "CALCULATED". | *@li pads: Pad value when padding_mode is "CALCULATED". | ||||
*@li data_format: An optional string, specifying the data format of "ksize" and "strides", either "NCHW", "NC1HWC0", or "NHWC" (default). | |||||
*@li data_format: An optional string, specifying the data format of "ksize" and "strides", | |||||
* either "NCHW", or "NHWC" (default). | |||||
*@li global_pooling: Global or not. If true, pads will change to {0,0,0,0} and ksize will change to [input_h, input_w] | *@li global_pooling: Global or not. If true, pads will change to {0,0,0,0} and ksize will change to [input_h, input_w] | ||||
*@li ceil_mode: Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". | *@li ceil_mode: Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". | ||||
*@li exclusive: Ignore padding area or not when calculating average. | *@li exclusive: Ignore padding area or not when calculating average. | ||||
@@ -130,7 +144,8 @@ REG_OP(AvgPool) | |||||
*@li Only single input and single output are supported. | *@li Only single input and single output are supported. | ||||
*@li Global pooling is supported. | *@li Global pooling is supported. | ||||
*@li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. ksize_H * ksize_W < 256 | *@li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. ksize_H * ksize_W < 256 | ||||
*@li Due to instruction restrictions, the values of "strides_h" and "strides_w" are positive integers within the range [1, 63]. | |||||
*@li Due to instruction restrictions, | |||||
* the values of "strides_h" and "strides_w" are positive integers within the range [1, 63]. | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator AvgPoolV2. | * Compatible with the TensorFlow operator AvgPoolV2. | ||||
*/ | */ | ||||
@@ -310,21 +325,24 @@ REG_OP(AvgPool3DGradD) | |||||
*@par Inputs: | *@par Inputs: | ||||
* One input: | * One input: | ||||
*x: An NC1HWC0 Tensor of type float16. | |||||
*x: A Tensor of type float16. | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value. | |||||
*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value. | |||||
*@li ksize: A required list of int8, int16, int32, or int64 values, | |||||
* specifying the size of the window for each dimension of the input tensor. No default value. | |||||
*@li strides: A required list of int8, int16, int32, or int64 values, | |||||
* specifying the stride of the sliding window for each dimension of the input tensor. No default value. | |||||
*@li padding: A required string. No default value. | *@li padding: A required string. No default value. | ||||
*@li data_format: An optional string. Defaults to "NC1HWC0" . \n | |||||
*@li data_format: An optional string . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type and format as input "x" . \n | *y: A Tensor. Has the same type and format as input "x" . \n | ||||
*@attention Constraints: | *@attention Constraints: | ||||
*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. | *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. | ||||
*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. | |||||
*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, | |||||
* strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. | |||||
*@li "padding" is either "SAME" or "VALID" . \n | *@li "padding" is either "SAME" or "VALID" . \n | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
@@ -348,7 +366,7 @@ REG_OP(MaxPoolExt2) | |||||
*@par Inputs: | *@par Inputs: | ||||
* One input: | * One input: | ||||
*x: An NC1HWC0 Tensor. Supported type:float16, float32, double, int8, int16, | |||||
*x: A Tensor. Supported type:float16, float32, double, int8, int16, | |||||
* int32, int64, uint8, uint16, qint8 | * int32, int64, uint8, uint16, qint8 | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -391,7 +409,7 @@ REG_OP(MaxPool) | |||||
*@brief Performs max 3d pooling on the input . \n | *@brief Performs max 3d pooling on the input . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*x: An NC1HWC0 Tensor. Supported type float16, float32, double . \n | |||||
*x: A Tensor. Supported type float16, float32, double . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li ksize: A required list of int8, int16, int32, or int64 values, | *@li ksize: A required list of int8, int16, int32, or int64 values, | ||||
@@ -457,7 +475,6 @@ REG_OP(MaxPool3D) | |||||
* y: An 6D tensor. the maxpool3d output(max value), format as NDoC1HoWoC0. | * y: An 6D tensor. the maxpool3d output(max value), format as NDoC1HoWoC0. | ||||
* @par Outputs: | * @par Outputs: | ||||
* argmax: A 5D uint16 tensor. the indice output. | * argmax: A 5D uint16 tensor. the indice output. | ||||
* format as NC1HWC0, actually it represent N, Do, C1*ksize, Ho*Wo//16, 16. | |||||
*/ | */ | ||||
REG_OP(MaxPool3DWithArgmax) | REG_OP(MaxPool3DWithArgmax) | ||||
.INPUT(x, TensorType::RealNumberType()) | .INPUT(x, TensorType::RealNumberType()) | ||||
@@ -546,9 +563,9 @@ REG_OP(MaxPool3DGradGrad) | |||||
* @brief Computes gradients of the maxpooling function . \n | * @brief Computes gradients of the maxpooling function . \n | ||||
* @par Inputs: | * @par Inputs: | ||||
* @li x1: A mutable NC1HWC0 tensor of type RealNumberType. | |||||
* @li x2: A mutable NC1HWC0 tensor of type RealNumberTypex. | |||||
* @li grad: A mutable NC1HWC0 tensor of type RealNumberType . \n | |||||
* @li x1: A mutable tensor of type RealNumberType. | |||||
* @li x2: A mutable tensor of type RealNumberTypex. | |||||
* @li grad: A mutable tensor of type RealNumberType . \n | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li ksize: A required tuple or list, specifying the size of the window for | * @li ksize: A required tuple or list, specifying the size of the window for | ||||
@@ -630,21 +647,24 @@ REG_OP(MaxPoolGradGrad) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Three inputs: | * Three inputs: | ||||
*@li x: An NC1HWC0 Tensor of type float16. | |||||
*@li strides: A required type of int32 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value. | |||||
*@li ksize: A required type of int32 values, specifying the size of the window for each dimension of the input tensor. No default value. | |||||
*@li x: A Tensor of type float16. | |||||
*@li strides: A required type of int32 values, | |||||
* specifying the stride of the sliding window for each dimension of the input tensor. No default value. | |||||
*@li ksize: A required type of int32 values, | |||||
* specifying the size of the window for each dimension of the input tensor. No default value. | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li padding: A required string. No default value. | *@li padding: A required string. No default value. | ||||
*@li data_format: An optional string. Defaults to "NC1HWC0" . \n | |||||
*@li data_format: An optional string. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type and format as input "x" . \n | *y: A Tensor. Has the same type and format as input "x" . \n | ||||
*@attention Constraints: | *@attention Constraints: | ||||
*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. | *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. | ||||
*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. | |||||
*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, | |||||
* strides[2] <= 63, strides[2] >= 1. | |||||
*@li "padding" is either "SAME" or "VALID" . \n | *@li "padding" is either "SAME" or "VALID" . \n | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
@@ -713,7 +733,7 @@ REG_OP(MaxPoolWithArgmax) | |||||
*@li grad: An 4d tensor. Supported type: float, double, int32, | *@li grad: An 4d tensor. Supported type: float, double, int32, | ||||
* uint8, int16, int8, int64, uint16, half, uint32, uint64. | * uint8, int16, int8, int64, uint16, half, uint32, uint64. | ||||
* Must set the format, supported format list ["NCHW, NHWC"] | * Must set the format, supported format list ["NCHW, NHWC"] | ||||
*@li argmx: An NC1HWC0 tensor of type int32 or int64 . \n | |||||
*@li argmx: A tensor of type int32 or int64 . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li ksize: A required list of int8, int16, int32, or int64 values, | *@li ksize: A required list of int8, int16, int32, or int64 values, | ||||
@@ -753,8 +773,8 @@ REG_OP(MaxPoolGradWithArgmax) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Two inputs: | * Two inputs: | ||||
*@li x: An NC1HWC0 Tensor of type float16. | |||||
*@li mask: An NC1HWC0 Tensor of type uint16 . \n | |||||
*@li x: A Tensor of type float16. | |||||
*@li mask: A Tensor of type uint16 . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value. | *@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value. | ||||
@@ -763,7 +783,7 @@ REG_OP(MaxPoolGradWithArgmax) | |||||
*@li originshape:A required list of int8, int16, int32, or int64 values, No default value. \n | *@li originshape:A required list of int8, int16, int32, or int64 values, No default value. \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*argmax: An NC1HWC0 Tensor of type int32 . \n | |||||
*argmax: A Tensor of type int32 . \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. | *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. | ||||
@@ -1314,7 +1334,7 @@ REG_OP(AvgPool1DD) | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type and format as input "x". | *y: A Tensor. Has the same type and format as input "x". | ||||
*argmax: A Tensor. type:uint16, format:NC1HWC0. | |||||
*argmax: A Tensor. type:uint16. | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. | *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. | ||||
*@li "strides is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, | *@li "strides is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, | ||||
@@ -1388,7 +1408,7 @@ REG_OP(MaxPoolGradWithArgmaxV2) | |||||
* @par Inputs: | * @par Inputs: | ||||
* One input: | * One input: | ||||
* x: An NC1HWC0 Tensor. Supported type:float16, float32, double, int32, int64, | |||||
* x: A Tensor. Supported type:float16, float32, double, int32, int64, | |||||
* uint8, int16, int8, uint16, qint8 | * uint8, int16, int8, uint16, qint8 | ||||
* @par Attributes: | * @par Attributes: | ||||
@@ -1400,9 +1420,8 @@ REG_OP(MaxPoolGradWithArgmaxV2) | |||||
* the input tensor. No default value. | * the input tensor. No default value. | ||||
* @li padding_mode: A required string. Defaults to "CALCULATED". | * @li padding_mode: A required string. Defaults to "CALCULATED". | ||||
* @li pads:A required list of int8, int16, int32, or int64 values, | * @li pads:A required list of int8, int16, int32, or int64 values, | ||||
* a data to caculate when padding_mode is "CALCULATED". | |||||
* a data to calculate when padding_mode is "CALCULATED". | |||||
* @li data_format: An optional string. Defaults to "NHWC" . | * @li data_format: An optional string. Defaults to "NHWC" . | ||||
* If data_format = "NC1HWC0", ori_format must be "NCHW". | |||||
* @li global_pooling bool, Whether to use the global pooling. | * @li global_pooling bool, Whether to use the global pooling. | ||||
* If global_pooling = true, kernel size and paddings will be ignored. | * If global_pooling = true, kernel size and paddings will be ignored. | ||||
* Default False | * Default False | ||||
@@ -1418,7 +1437,7 @@ REG_OP(MaxPoolGradWithArgmaxV2) | |||||
* ksize[1] * ksize[2] <= 255. | * ksize[1] * ksize[2] <= 255. | ||||
* @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, | * @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, | ||||
* strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. | * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. | ||||
* @li "padding" is "SAME" "VALID" or "CACULATE" . | |||||
* @li "padding" is "SAME" "VALID" or "CALCULATE" . | |||||
* @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
@@ -1440,9 +1459,9 @@ REG_OP(MaxPoolV3) | |||||
* @brief Computes gradients of the maxpooling function . \n | * @brief Computes gradients of the maxpooling function . \n | ||||
* @par Inputs: | * @par Inputs: | ||||
* @li orig_input: A mutable NC1HWC0 tensor of type RealNumberType. | |||||
* @li orig_output: A mutable NC1HWC0 tensor of type RealNumberTypex. | |||||
* @li grad: A mutable NC1HWC0 tensor of type RealNumberType . \n | |||||
* @li orig_input: A mutable tensor of type RealNumberType. | |||||
* @li orig_output: A mutable tensor of type RealNumberTypex. | |||||
* @li grad: A mutable tensor of type RealNumberType . \n | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li ksize: A required list of int8, int16, int32, or int64 values, | * @li ksize: A required list of int8, int16, int32, or int64 values, | ||||
@@ -1650,9 +1669,9 @@ REG_OP(AdaptiveAvgPool2dGrad) | |||||
* @par Inputs: | * @par Inputs: | ||||
* Three inputs, including: | * Three inputs, including: | ||||
* @li x: An NC1HWC0 tensor of type float16. | |||||
* @li grad: An NC1HWC0 tensor of type float16. | |||||
* @li argmax: An NC1HWC0 tensor of type uint16 or int64. \n | |||||
* @li x: A tensor of type float16. | |||||
* @li grad: A tensor of type float16. | |||||
* @li argmax: A tensor of type uint16 or int64. \n | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | * @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | ||||
@@ -1665,11 +1684,11 @@ REG_OP(AdaptiveAvgPool2dGrad) | |||||
* y: A Tensor. Has the same type and format as input "x". \n | * y: A Tensor. Has the same type and format as input "x". \n | ||||
* @attention Constraints: | * @attention Constraints: | ||||
* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. | |||||
* @li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1 | |||||
* @li "pads" is listint. | |||||
* @li "ceil_mode" defaults to False. | |||||
* @li "data_format" defaults to "NC1HWC0". \n | |||||
* @li ksize: is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. | |||||
* @li strides: is a list that has length 4: strides[0] = 1 or strides[3] = 1 | |||||
* @li pads: listint. | |||||
* @li ceil_mode: defaults to False. | |||||
* @li data_format: A optional string. \n | |||||
* @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV1. | * Compatible with the TensorFlow operator MaxPoolGradWithArgmaxV1. | ||||
@@ -1693,7 +1712,7 @@ REG_OP(MaxPoolGradWithArgmaxV1) | |||||
* @par Inputs: | * @par Inputs: | ||||
* One input: | * One input: | ||||
* x: An NC1HWC0 Tensor of type float16. \n | |||||
* x: A Tensor of type float16. \n | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | * @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for | ||||
@@ -1704,15 +1723,15 @@ REG_OP(MaxPoolGradWithArgmaxV1) | |||||
* @par Outputs: | * @par Outputs: | ||||
* y: A Tensor. Has the same type and format as input "x". | * y: A Tensor. Has the same type and format as input "x". | ||||
* argmax: A Tensor. type:uint16, format:NC1HWC0. \n | |||||
* argmax: A Tensor. type:uint16. \n | |||||
* @attention Constraints: | * @attention Constraints: | ||||
* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. | |||||
* @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, | |||||
* @li ksize: a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. | |||||
* @li stride: a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, | |||||
* strides[2] <= 63, strides[2] >= 1. | * strides[2] <= 63, strides[2] >= 1. | ||||
* @li "pads" is listint. | |||||
* @li "ceil_mode" defaults to False. | |||||
* @li "data_format" defaults to "NC1HWC0". \n | |||||
* @li pads: listint. | |||||
* @li ceil_mode: defaults to False. | |||||
* @li data_format: A optional string. \n | |||||
* @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator MaxPoolWithArgmaxV1. | * Compatible with the TensorFlow operator MaxPoolWithArgmaxV1. | ||||
@@ -423,8 +423,8 @@ REG_OP(Softplus) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Two inputs: | *Two inputs: | ||||
* @li gradients: An NC1HWC0 or ND Tensor of type float16 or float32. | |||||
* @li features: An NC1HWC0 or ND Tensor of type float16 or float32. | |||||
* @li gradients: A ND Tensor of type float16 or float32. | |||||
* @li features: A ND Tensor of type float16 or float32. | |||||
*@par Outputs: | *@par Outputs: | ||||
@@ -458,15 +458,34 @@ REG_OP(Softsign) | |||||
.OP_END_FACTORY_REG(Softsign) | .OP_END_FACTORY_REG(Softsign) | ||||
/** | /** | ||||
* @brief Computes softsignGrad: gradients / (1 + abs(features)) ** 2 . | |||||
* | |||||
* @par Inputs: | |||||
* Two inputs, including: | |||||
* @li gradients: A Tensor.Must be one of the following types:float16, float32, | |||||
* @li features: A Tensor of the same type and shape as "gradients". | |||||
* @par Outputs: | |||||
* output:A Tensor. Has the same type as "gradients". | |||||
* @par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operator SoftsignGrad. | |||||
*/ | |||||
REG_OP(SoftsignGrad) | |||||
.INPUT(gradients, TensorType::FloatingDataType()) | |||||
.INPUT(features, TensorType::FloatingDataType()) | |||||
.OUTPUT(output, TensorType::FloatingDataType()) | |||||
.OP_END_FACTORY_REG(SoftsignGrad) | |||||
/** | |||||
*@brief Computes scaled exponential linear: scale * alpha * (exp(x) - 1) . \n | *@brief Computes scaled exponential linear: scale * alpha * (exp(x) - 1) . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
* One input: | * One input: | ||||
*x: A Tensor. Must be one of the following types: float16, float, double | *x: A Tensor. Must be one of the following types: float16, float, double | ||||
* int32, int8. format:ND, NC1HWC0 . \n | |||||
* int32, int8. format:ND. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type and format as input "x". format:ND, NC1HWC0 . \n | |||||
*y: A Tensor. Has the same type and format as input "x". format:ND. \n | |||||
*@see Region() | *@see Region() | ||||
@@ -481,6 +500,28 @@ REG_OP(Selu) | |||||
.OP_END_FACTORY_REG(Selu) | .OP_END_FACTORY_REG(Selu) | ||||
/** | /** | ||||
*@brief Computes SeluGrad backprops: gradients * (outputs + scale * alpha) | |||||
* if outputs < 0, scale * gradients otherwise . | |||||
*@par Inputs: | |||||
* Two inputs, including: | |||||
*@li gradients: A Tensor. Must be one of the following types: float32, float16, | |||||
* int32, int8, uint8 | |||||
*@li outputs: A Tensor. Must be one of the following types: float32, float16, | |||||
* int32, int8, uint8 | |||||
*@par Outputs: | |||||
*y: A Tensor. Must have the same type as "gradients" . | |||||
*@par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operator SeluGrad. | |||||
*/ | |||||
REG_OP(SeluGrad) | |||||
.INPUT(gradients, TensorType::RealNumberType()) | |||||
.INPUT(outputs, TensorType::RealNumberType()) | |||||
.OUTPUT(y, TensorType::RealNumberType()) | |||||
.OP_END_FACTORY_REG(SeluGrad) | |||||
/** | |||||
*@brief Computes rectified linear gradients for a ReLU operation . \n | *@brief Computes rectified linear gradients for a ReLU operation . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
@@ -246,14 +246,14 @@ REG_OP(AscendRequantS16) | |||||
* @brief Quantizes the input of int8 . \n | * @brief Quantizes the input of int8 . \n | ||||
* @par Inputs: | * @par Inputs: | ||||
* @li x: An FRACTAL_Z tensor of type int8, specifying the input. | |||||
* @li offset: An FRACTAL_Z tensor of type int8. | |||||
* @li x: A tensor of type int8, specifying the input. | |||||
* @li offset: A tensor of type int8. | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li dst_type: A optional int from: DT_INT8, DT_INT4. Defaults to DT_INT8. | * @li dst_type: A optional int from: DT_INT8, DT_INT4. Defaults to DT_INT8. | ||||
* @par Outputs: | * @par Outputs: | ||||
* @li y: output tensor of type int4 or int8 and with format FRACTAL_Z. | |||||
* @li y: output tensor of type int4 or int8. | |||||
* @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
* It is a custom operator. It has no corresponding operator in Caffe, Onnx, Tensorflow or Pythorch. | * It is a custom operator. It has no corresponding operator in Caffe, Onnx, Tensorflow or Pythorch. | ||||
@@ -28,7 +28,7 @@ namespace ge { | |||||
*@brief Performs reduced batch normalization . \n | *@brief Performs reduced batch normalization . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*x: A 5D Tensor of type float16 or float32, with format NC1HWC0 . \n | |||||
*x: A tensor of type float16 or float32. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*@li sum: A 1D Tensor of type float32 for SUM reduced "x". | *@li sum: A 1D Tensor of type float32 for SUM reduced "x". | ||||
@@ -49,11 +49,11 @@ REG_OP(BNTrainingReduce) | |||||
*@brief Performs reduced batch normalization . \n | *@brief Performs reduced batch normalization . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*x: A 6D Tensor of type float16 or float32, with format NDC1HWC0 . \n | |||||
*x: A tensor of type float16 or float32. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*@li sum: A 3D Tensor of type float32 for SUM reduced "x". | |||||
*@li square_sum: A 3D Tensor of type float32 for SUMSQ reduced "x" . \n | |||||
*@li sum: A tensor of type float32 for SUM reduced "x". | |||||
*@li square_sum: A tensor of type float32 for SUMSQ reduced "x" . \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
* This operator is a BatchNorm fusion operator for updating the moving | * This operator is a BatchNorm fusion operator for updating the moving | ||||
@@ -71,17 +71,17 @@ REG_OP(BN3DTrainingReduce) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Seven inputs, including: | * Seven inputs, including: | ||||
*@li grads: A 5D Tensor of type float16 or float32, with format NC1HWC0, for | |||||
*@li grads: A tensor of type float16 or float32, for | |||||
* the gradient. | * the gradient. | ||||
*@li x: A 5D Tensor of type float16 or float32, with format NC1HWC0. | |||||
*@li diff_scale: A 5D Tensor of type float32, with format NC1HWC0, | |||||
*@li x: A tensor of type float16 or float32. | |||||
*@li diff_scale: A tensor of type float32, | |||||
* for the mean of "x". | * for the mean of "x". | ||||
*@li diff_offset: A 5D Tensor of type float32, with format NC1HWC0, | |||||
*@li diff_offset: A tensor of type float32, | |||||
* for the variance of "x". | * for the variance of "x". | ||||
*@li scale: A 5D Tensor of type float32, with format NC1HWC0. | |||||
*@li batch_mean: A 5D Tensor of type float32, with format NC1HWC0, | |||||
*@li scale: A tensor of type float32. | |||||
*@li batch_mean: A tensor of type float32, | |||||
* for the mean of "x". | * for the mean of "x". | ||||
*@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0, | |||||
*@li batch_variance: A tensor of type float32, | |||||
* for the variance of "x" . \n | * for the variance of "x" . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -89,7 +89,7 @@ REG_OP(BN3DTrainingReduce) | |||||
* added to the variance of "x" . \n | * added to the variance of "x" . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor of type float16 or float32, with format NC1HWC0, for the offset | |||||
*y: A Tensor of type float16 or float32, for the offset | |||||
* of "x" . \n | * of "x" . \n | ||||
*@attention Constraints: | *@attention Constraints: | ||||
@@ -114,17 +114,17 @@ REG_OP(BNTrainingReduceGrad) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Seven inputs, including: | * Seven inputs, including: | ||||
*@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0, for | |||||
*@li grads: A tensor of type float16 or float32, for | |||||
* the gradient. | * the gradient. | ||||
*@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0. | |||||
*@li diff_scale: A 6D Tensor of type float32, with format NDC1HWC0, | |||||
*@li x: A tensor of type float16 or float32. | |||||
*@li diff_scale: A tensor of type float32, | |||||
* for the mean of "x". | * for the mean of "x". | ||||
*@li diff_offset: A 6D Tensor of type float32, with format NDC1HWC0, | |||||
*@li diff_offset: A tensor of type float32, | |||||
* for the variance of "x". | * for the variance of "x". | ||||
*@li scale: A 6D Tensor of type float32, with format NDC1HWC0. | |||||
*@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0, | |||||
*@li scale: A tensor of type float32. | |||||
*@li batch_mean: A tensor of type float32, | |||||
* for the mean of "x". | * for the mean of "x". | ||||
*@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0, | |||||
*@li batch_variance: A tensor of type float32, | |||||
* for the variance of "x" . \n | * for the variance of "x" . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -132,7 +132,7 @@ REG_OP(BNTrainingReduceGrad) | |||||
* added to the variance of "x" . \n | * added to the variance of "x" . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor of type float16 or float32, with format NDC1HWC0, for the offset | |||||
*y: A Tensor of type float16 or float32, for the offset | |||||
* of "x" . \n | * of "x" . \n | ||||
*@attention Constraints: | *@attention Constraints: | ||||
@@ -156,8 +156,8 @@ REG_OP(BN3DTrainingReduceGrad) | |||||
*@brief Performs reduced batch normalization . \n | *@brief Performs reduced batch normalization . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
* Seven inputs, including: (NC1HWC0 supported) | |||||
*@li x: A 5D Tensor of type float16 or float32. | |||||
* Seven inputs, including: | |||||
*@li x: A tensor of type float16 or float32. | |||||
*@li sum: A 1D Tensor of type float32 for the output of operator | *@li sum: A 1D Tensor of type float32 for the output of operator | ||||
* BNTrainingReduce. | * BNTrainingReduce. | ||||
*@li square_sum: A 1D Tensor of type float32 for the output of operator | *@li square_sum: A 1D Tensor of type float32 for the output of operator | ||||
@@ -174,10 +174,10 @@ REG_OP(BN3DTrainingReduceGrad) | |||||
* and variance . \n | * and variance . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
* Five outputs, including: (NC1HWC0 supported) | |||||
*@li y: A 5D Tensor of type float16 or float32, for normalized "x". | |||||
*@li mean: A 5D Tensor of type float32, for the updated mean. | |||||
*@li variance: A 5D Tensor of type float32, for the updated variance. | |||||
* Five outputs, including: | |||||
*@li y: A tensor of type float16 or float32, for normalized "x". | |||||
*@li mean: A tensor of type float32, for the updated mean. | |||||
*@li variance: A tensor of type float32, for the updated variance. | |||||
*@li batch_mean: A 1D Tensor of type float32, for the mean of "x". | *@li batch_mean: A 1D Tensor of type float32, for the mean of "x". | ||||
*@li batch_variance: A 1D Tensor of type float32, for the variance of "x" . \n | *@li batch_variance: A 1D Tensor of type float32, for the variance of "x" . \n | ||||
@@ -209,16 +209,16 @@ REG_OP(BNTrainingUpdate) | |||||
*@brief Performs reduced batch normalization . \n | *@brief Performs reduced batch normalization . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
* Seven inputs, including: (NDC1HWC0 supported) | |||||
*@li x: A 6D Tensor of type float16 or float32. | |||||
*@li sum: A 6D Tensor of type float32 for the output of operator | |||||
* Seven inputs, including: | |||||
*@li x: A tensor of type float16 or float32. | |||||
*@li sum: A tensor of type float32 for the output of operator | |||||
* BN3DTrainingUpdate. | * BN3DTrainingUpdate. | ||||
*@li square_sum: A 6D Tensor of type float32 for the output of operator | |||||
*@li square_sum: A tensor of type float32 for the output of operator | |||||
* BN3DTrainingUpdate. | * BN3DTrainingUpdate. | ||||
*@li scale: A 6D Tensor of type float32, for the scaling factor. | |||||
*@li offset: A 6D Tensor of type float32, for the scaling offset. | |||||
*@li mean: A 6D Tensor of type float32, for the updated mean. | |||||
*@li variance: A 6D Tensor of type float32, for the updated variance . \n | |||||
*@li scale: A tensor of type float32, for the scaling factor. | |||||
*@li offset: A tensor of type float32, for the scaling offset. | |||||
*@li mean: A tensor of type float32, for the updated mean. | |||||
*@li variance: A tensor of type float32, for the updated variance . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li epsilon: A required float32, specifying the small value added to variance | *@li epsilon: A required float32, specifying the small value added to variance | ||||
@@ -227,12 +227,12 @@ REG_OP(BNTrainingUpdate) | |||||
* and variance . \n | * and variance . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
* Five outputs, including: (NDC1HWC0 supported) | |||||
*@li y: A 6D Tensor of type float16 or float32, for normalized "x". | |||||
*@li mean: A 6D Tensor of type float32, for the updated mean. | |||||
*@li variance: A 6D Tensor of type float32, for the updated variance. | |||||
*@li batch_mean: A 6D Tensor of type float32, for the mean of "x". | |||||
*@li batch_variance: A 6D Tensor of type float32, for the variance of "x" . \n | |||||
* Five outputs, including: | |||||
*@li y: A tensor of type float16 or float32, for normalized "x". | |||||
*@li mean: A tensor of type float32, for the updated mean. | |||||
*@li variance: A tensor of type float32, for the updated variance. | |||||
*@li batch_mean: A tensor of type float32, for the mean of "x". | |||||
*@li batch_variance: A tensor of type float32, for the variance of "x" . \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*@li This operator is a BatchNorm fusion operator for updating the moving | *@li This operator is a BatchNorm fusion operator for updating the moving | ||||
@@ -262,19 +262,19 @@ REG_OP(BN3DTrainingUpdate) | |||||
*@brief Performs batch normalization for inference . \n | *@brief Performs batch normalization for inference . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
* Five inputs, including: (NC1HWC0 supported) | |||||
*@li x: A 5D Tensor of type float16 or float32. | |||||
*@li scale: A 5D Tensor of type float32, for the scaling factor. | |||||
*@li offset: A 5D Tensor of type float32, for the scaling offset. | |||||
*@li mean: A 5D Tensor of type float32, for the mean. | |||||
*@li variance: A 5D Tensor of type float32, for the variance . \n | |||||
* Five inputs, including: | |||||
*@li x: A tensor of type float16 or float32. | |||||
*@li scale: A tensor of type float32, for the scaling factor. | |||||
*@li offset: A tensor of type float32, for the scaling offset. | |||||
*@li mean: A tensor of type float32, for the mean. | |||||
*@li variance: A tensor of type float32, for the variance . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*epsilon: An optional float32, specifying the small value added to variance to | *epsilon: An optional float32, specifying the small value added to variance to | ||||
* avoid dividing by zero. Defaults to "0.0001" . \n | * avoid dividing by zero. Defaults to "0.0001" . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A 5D Tensor of type float16 or float32 for the normalized "x" . \n | |||||
*y: A tensor of type float16 or float32 for the normalized "x" . \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*For Ascend 310, the result accuracy fails to reach 1/1000 due to the square root | *For Ascend 310, the result accuracy fails to reach 1/1000 due to the square root | ||||
@@ -295,21 +295,21 @@ REG_OP(BNInfer) | |||||
assignmoving average . \n | assignmoving average . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*Five inputs, including: (NC1HWC0 supported) | |||||
*@li x: A 5D Tensor of type float16 or float32. | |||||
*@li sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce. | |||||
*@li square_sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce. | |||||
*@li scale: A 5D Tensor of type float32, for the scaling factor. | |||||
*@li offset: A 5D Tensor of type float32, for the scaling offset . \n | |||||
*Five inputs, including: | |||||
*@li x: A tensor of type float16 or float32. | |||||
*@li sum: A tensor of type float32 for the output of operator BNTrainingReduce. | |||||
*@li square_sum: A tensor of type float32 for the output of operator BNTrainingReduce. | |||||
*@li scale: A tensor of type float32, for the scaling factor. | |||||
*@li offset: A tensor of type float32, for the scaling offset . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n | *epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*Three outputs, including: (NC1HWC0 supported) | |||||
*@li y: A 5D Tensor of type float16 or float32, for normalized "x". | |||||
*@li batch_mean: A 5D Tensor of type float32, for the mean of "x". | |||||
*@li batch_variance: A 5D Tensor of type float32, for the variance of "x" . \n | |||||
*Three outputs, including: | |||||
*@li y: A tensor of type float16 or float32, for normalized "x". | |||||
*@li batch_mean: A tensor of type float32, for the mean of "x". | |||||
*@li batch_variance: A tensor of type float32, for the variance of "x" . \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*This operator is used in conjunction with BNTrainingReduce. | *This operator is used in conjunction with BNTrainingReduce. | ||||
@@ -332,22 +332,22 @@ REG_OP(BNTrainingUpdateV2) | |||||
assign moving average . \n | assign moving average . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
* Five inputs, including: (NC1HWC0 supported) | |||||
*@li x: A 5D Tensor of type float16 or float32. | |||||
*@li sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce. | |||||
*@li square_sum: A 5D Tensor of type float32 for the output of operator BNTrainingReduce. | |||||
*@li scale: A 5D Tensor of type float32, for the scaling factor. | |||||
*@li offset: A 5D Tensor of type float32, for the scaling offset . \n | |||||
* Five inputs, including: | |||||
*@li x: A tensor of type float16 or float32. | |||||
*@li sum: A tensor of type float32 for the output of operator BNTrainingReduce. | |||||
*@li square_sum: A tensor of type float32 for the output of operator BNTrainingReduce. | |||||
*@li scale: A tensor of type float32, for the scaling factor. | |||||
*@li offset: A tensor of type float32, for the scaling offset . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n | *epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*@li y: A 5D Tensor of type float16 or float32, for normalized "x". | |||||
*@li batch_mean: A 5D Tensor of type float32, for the mean of "x". | |||||
*@li batch_variance: A 5D Tensor of type float32, for the variance of "x". | |||||
*@li reserve_1: A 5D Tensor of type float32, for the mean of batch "x". Has the same type as batch_mean. | |||||
*@li reserve_2: A 5D Tensor of type float32, for the variance of batch "x". Has the same type as batch_mean . \n | |||||
*@li y: A tensor of type float16 or float32, for normalized "x". | |||||
*@li batch_mean: A tensor of type float32, for the mean of "x". | |||||
*@li batch_variance: A tensor of type float32, for the variance of "x". | |||||
*@li reserve_1: A tensor of type float32, for the mean of batch "x". Has the same type as batch_mean. | |||||
*@li reserve_2: A tensor of type float32, for the variance of batch "x". Has the same type as batch_mean . \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*@li This operator is used in conjunction with BNTrainingReduce. | *@li This operator is used in conjunction with BNTrainingReduce. | ||||
@@ -372,12 +372,12 @@ REG_OP(BNTrainingUpdateV3) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Four inputs, including: | * Four inputs, including: | ||||
*@li grads: A 5D Tensor of type float16 or float32, with format NC1HWC0, | |||||
*@li grads: A tensor of type float16 or float32, | |||||
* for the gradient. | * for the gradient. | ||||
*@li x: A 5D Tensor of type float16 or float32, with format NC1HWC0. | |||||
*@li batch_mean: A 5D Tensor of type float32, with format NC1HWC0, | |||||
*@li x: A tensor of type float16 or float32. | |||||
*@li batch_mean: A tensor of type float32, | |||||
* for the mean of "x". | * for the mean of "x". | ||||
*@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0, | |||||
*@li batch_variance: A tensor of type float32, | |||||
* for the variance of "x" . \n | * for the variance of "x" . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -385,9 +385,9 @@ REG_OP(BNTrainingUpdateV3) | |||||
* added to the variance of "x" . \n | * added to the variance of "x" . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*@li diff_scale: A Tensor of type float32, with format NC1HWC0, | |||||
*@li diff_scale: A Tensor of type float32, | |||||
* for the offset of "scale". | * for the offset of "scale". | ||||
*@li diff_offset: A Tensor of type float32, with format NC1HWC0, | |||||
*@li diff_offset: A Tensor of type float32, | |||||
* for the offset of "offset" . \n | * for the offset of "offset" . \n | ||||
*/ | */ | ||||
@@ -406,12 +406,12 @@ REG_OP(BNTrainingUpdateGrad) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Four inputs, including: | * Four inputs, including: | ||||
*@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0, | |||||
*@li grads: A tensor of type float16 or float32, | |||||
* for the gradient. | * for the gradient. | ||||
*@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0. | |||||
*@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0, | |||||
*@li x: A tensor of type float16 or float32. | |||||
*@li batch_mean: A tensor of type float32, | |||||
* for the mean of "x". | * for the mean of "x". | ||||
*@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0, | |||||
*@li batch_variance: A tensor of type float32, | |||||
* for the variance of "x" . \n | * for the variance of "x" . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -419,9 +419,9 @@ REG_OP(BNTrainingUpdateGrad) | |||||
* added to the variance of "x" . \n | * added to the variance of "x" . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*@li diff_scale: A Tensor of type float32, with format NDC1HWC0, | |||||
*@li diff_scale: A Tensor of type float32, | |||||
* for the offset of "scale". | * for the offset of "scale". | ||||
*@li diff_offset: A Tensor of type float32, with format NDC1HWC0, | |||||
*@li diff_offset: A Tensor of type float32, | |||||
* for the offset of "offset" . \n | * for the offset of "offset" . \n | ||||
*/ | */ | ||||
@@ -440,15 +440,15 @@ REG_OP(BN3DTrainingUpdateGrad) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Three inputs, including: | * Three inputs, including: | ||||
*@li grads: A 5D Tensor of type loat16 or float32, with format NC1HWC0, for the gradient. | |||||
*@li scale: A 5D Tensor of type float32, with format NC1HWC0. | |||||
*@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0. It is an output of BatchNorm . \n | |||||
*@li grads: A tensor of type loat16 or float32, for the gradient. | |||||
*@li scale: A tensor of type float32. | |||||
*@li batch_variance: A tensor of type float32. It is an output of BatchNorm . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x" . \n | *epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x" . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*x_backprop: A Tensor of type float16 or float32, with format NC1HWC0, for the offset of "x" . \n | |||||
*x_backprop: A Tensor of type float16 or float32, for the offset of "x" . \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
* The preceding layer of this operator must be operator BatchNorm. | * The preceding layer of this operator must be operator BatchNorm. | ||||
@@ -690,6 +690,9 @@ REG_OP(ReduceMean) | |||||
*@li keep_dims: A bool or NoneType. | *@li keep_dims: A bool or NoneType. | ||||
* - If true, retains reduced dimensions with length 1. | * - If true, retains reduced dimensions with length 1. | ||||
* - If false, the rank of the tensor is reduced by 1 for each entry in axis. | * - If false, the rank of the tensor is reduced by 1 for each entry in axis. | ||||
*@li keep_dims: A bool default True. | |||||
* - If true, same as tf. | |||||
* - If false, when x's shape is [], reduce all dims, for onnx. | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type as "x" . \n | *y: A Tensor. Has the same type as "x" . \n | ||||
@@ -704,6 +707,7 @@ REG_OP(ReduceMeanD) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.REQUIRED_ATTR(axes, ListInt) | .REQUIRED_ATTR(axes, ListInt) | ||||
.ATTR(keep_dims, Bool, false) | .ATTR(keep_dims, Bool, false) | ||||
.ATTR(noop_with_empty_axes, Bool, true) | |||||
.OP_END_FACTORY_REG(ReduceMeanD) | .OP_END_FACTORY_REG(ReduceMeanD) | ||||
/** | /** | ||||
@@ -983,7 +987,7 @@ REG_OP(EuclideanNormD) | |||||
*@brief Performs instance normalization for inference . \n | *@brief Performs instance normalization for inference . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
* Five inputs, including: (NC1HWC0 supported) | |||||
* Five inputs, including: | |||||
*@li x: A Tensor of type float16 or float32. | *@li x: A Tensor of type float16 or float32. | ||||
*@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma. | *@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma. | ||||
*@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta. | *@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta. | ||||
@@ -1184,22 +1188,22 @@ REG_OP(GNTrainingReduce) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Seven inputs, including: (NCHW NHWC supported) | * Seven inputs, including: (NCHW NHWC supported) | ||||
*@li x: A Tensor of type float16 or float32. | *@li x: A Tensor of type float16 or float32. | ||||
*@li sum: A 5D Tensor of type float32, | |||||
*@li sum: A tensor of type float32, | |||||
shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC | shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC | ||||
for the output of operator GNTrainingReduce. | for the output of operator GNTrainingReduce. | ||||
*@li square_sum: A 5D Tensor of type float32, | |||||
*@li square_sum: A tensor of type float32, | |||||
shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC | shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC | ||||
for the output of operator GNTrainingReduce. | for the output of operator GNTrainingReduce. | ||||
*@li scale: A 5D Tensor of type float32, | |||||
*@li scale: A tensor of type float32, | |||||
shape is [1, G, 1, 1, 1] for NCHW, [1, 1, 1, G, 1] for NHWC | shape is [1, G, 1, 1, 1] for NCHW, [1, 1, 1, G, 1] for NHWC | ||||
is for the scaling gamma. | is for the scaling gamma. | ||||
*@li offset: A 5D Tensor of type float32, | |||||
*@li offset: A tensor of type float32, | |||||
shape is [1, G, 1, 1, 1] for NCHW, [1, 1, 1, G, 1] for NHWC | shape is [1, G, 1, 1, 1] for NCHW, [1, 1, 1, G, 1] for NHWC | ||||
for the scaling beta. | for the scaling beta. | ||||
*@li mean: A 5D Tensor of type float32, | |||||
*@li mean: A tensor of type float32, | |||||
shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC | shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC | ||||
for the updated mean. | for the updated mean. | ||||
*@li variance: A 5D Tensor of type float32, | |||||
*@li variance: A tensor of type float32, | |||||
shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC | shape is [N, G, 1, 1, 1] for NCHW, [N, 1, 1, G, 1] for NHWC | ||||
for the updated variance. | for the updated variance. | ||||
@@ -1209,7 +1213,7 @@ for the updated variance. | |||||
*@li num_groups: Int, specifying the num of groups. required, same to GNTrainingReduce | *@li num_groups: Int, specifying the num of groups. required, same to GNTrainingReduce | ||||
*@par Outputs: | *@par Outputs: | ||||
* Three outputs, including: (NC1HWC0 supported) | |||||
* Three outputs, including: | |||||
*@li y: A Tensor of type float16 or float32, for normalized "x". | *@li y: A Tensor of type float16 or float32, for normalized "x". | ||||
*@li batch_mean: A Tensor of type float32, for the updated mean. | *@li batch_mean: A Tensor of type float32, for the updated mean. | ||||
*@li batch_variance: A Tensor of type float32, for the updated variance . \n | *@li batch_variance: A Tensor of type float32, for the updated variance . \n | ||||
@@ -1338,7 +1342,7 @@ REG_OP(ReduceStdWithMean) | |||||
*@brief Performs reduced batch normalization . \n | *@brief Performs reduced batch normalization . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*x: A 5D Tensor of type float16 or float32, with format NC1HWC0 . \n | |||||
*x: A tensor of type float16 or float32 . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*@li mean: A Tensor of type float32 for SUM reduced "x". | *@li mean: A Tensor of type float32 for SUM reduced "x". | ||||
@@ -28,10 +28,10 @@ namespace ge { | |||||
*@brief: Basic LSTM Cell forward calculation. | *@brief: Basic LSTM Cell forward calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
*five inputs: | *five inputs: | ||||
*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. | |||||
*@li h:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. | |||||
*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
*@li x:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li h:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li c:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li w:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li b:A 1D Tensor. Must be one of the following types: float16. The format must be ND . \n | *@li b:A 1D Tensor. Must be one of the following types: float16. The format must be ND . \n | ||||
*@li mask:A 1D Tensor. Must be one of the following types: uint8. | *@li mask:A 1D Tensor. Must be one of the following types: uint8. | ||||
@@ -75,12 +75,12 @@ REG_OP(BasicLSTMCell) | |||||
*@brief: Dynamic LSTM forward calculation . \n | *@brief: Dynamic LSTM forward calculation . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*@li x:A 4D Tensor. Must be the type float32. The format must be FRACTAL_NZ. | |||||
*@li w:A 4D Tensor. Must be the type float32. The format must be FRACTAL_Z. | |||||
*@li x:A 4D Tensor. Must be the type float32. | |||||
*@li w:A 4D Tensor. Must be the type float32. | |||||
*@li b:A 1D Tensor. Must be the type float32. The format must be ND . \n | *@li b:A 1D Tensor. Must be the type float32. The format must be ND . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*output_h:A Tensor of output. Must be the type float32. The format must be FRACTAL_Z. | |||||
*output_h:A Tensor of output. Must be the type float32. | |||||
*@par Restrictions: | *@par Restrictions: | ||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
@@ -96,27 +96,27 @@ REG_OP(DynamicLSTM) | |||||
*@brief: DynamicRNNGrad calculation. | *@brief: DynamicRNNGrad calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
*ten inputs: \n | *ten inputs: \n | ||||
*@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li b:A 1D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li y:A 1D Tensor. Must be one of the following types: int32. The format must be FRACTAL_NZ. | |||||
*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dc:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li x:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li w:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li b:A 1D Tensor. Must be one of the following types: float16, float32. | |||||
*@li y:A 1D Tensor. Must be one of the following types: int32. | |||||
*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li h:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li c:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dc:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li i:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li j:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li seq_length:A 1D Tensor. Must be one of the following types: int32. | *@li seq_length:A 1D Tensor. Must be one of the following types: int32. | ||||
*@li mask:A 1D Tensor. Must be one of the following types: int8. | *@li mask:A 1D Tensor. Must be one of the following types: int8. | ||||
*@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li wci:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li wco:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. | *@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. | ||||
@@ -131,14 +131,14 @@ REG_OP(DynamicLSTM) | |||||
*@par Outputs: | *@par Outputs: | ||||
*eight outputs: \n | *eight outputs: \n | ||||
*@li dw:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li db:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dwci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dwcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dwco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dw:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li db:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dwci:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dwcf:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dwco:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*/ | */ | ||||
REG_OP(DynamicRNNGrad) | REG_OP(DynamicRNNGrad) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
@@ -185,15 +185,15 @@ REG_OP(DynamicRNNGrad) | |||||
*@brief: DynamicRNN calculation. | *@brief: DynamicRNN calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
*ten inputs: | *ten inputs: | ||||
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | ||||
*@li seq_length:A optional Tensor. Only Support float16 in FRACTAL_NZ and int32 in ND. | |||||
*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li seq_length:A optional Tensor. Only Support int32 in ND. | |||||
*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. | |||||
*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. | |||||
*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. | |||||
*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n | *@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -212,14 +212,14 @@ REG_OP(DynamicRNNGrad) | |||||
*@par Outputs: | *@par Outputs: | ||||
*eight outputs: | *eight outputs: | ||||
*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li y:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li i:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li j:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@par Third-party framework compatibility: | *@par Third-party framework compatibility: | ||||
* Compatible with the TF operator LSTM. | * Compatible with the TF operator LSTM. | ||||
*/ | */ | ||||
@@ -260,18 +260,16 @@ REG_OP(DynamicRNN) | |||||
*@brief: DynamicRNNV2 calculation. | *@brief: DynamicRNNV2 calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
*ten inputs: | *ten inputs: | ||||
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li weight_input:A required 4D Tensor. Must be one of the following types: float16, float32. | *@li weight_input:A required 4D Tensor. Must be one of the following types: float16, float32. | ||||
*The format must be FRACTAL_Z. | |||||
*@li weight_hidden:A required 4D Tensor. Must be one of the following types: float16, float32. | *@li weight_hidden:A required 4D Tensor. Must be one of the following types: float16, float32. | ||||
*The format must be FRACTAL_Z. | |||||
*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | ||||
*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND. | *@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND. | ||||
*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. | |||||
*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. | |||||
*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. | |||||
*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n | *@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -298,16 +296,16 @@ REG_OP(DynamicRNN) | |||||
*@par Outputs: | *@par Outputs: | ||||
*eight outputs: | *eight outputs: | ||||
*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li y:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*Return the last output_h. | *Return the last output_h. | ||||
*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*Return the last output_c. | *Return the last output_c. | ||||
*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li i:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li j:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@par Third-party framework compatibility: | *@par Third-party framework compatibility: | ||||
* Compatible with the TF operator LSTM or TF keras operator LSTM. | * Compatible with the TF operator LSTM or TF keras operator LSTM. | ||||
*/ | */ | ||||
@@ -353,18 +351,18 @@ REG_OP(DynamicRNNV2) | |||||
*@brief: DynamicRNNV3 calculation. | *@brief: DynamicRNNV3 calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
*ten inputs: | *ten inputs: | ||||
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | ||||
*@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND. | *@li seq_length:A optional 1D Tensor. Must be one of the following types: int32. The format must be ND. | ||||
*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li init_h:A optional 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li init_c:A optional 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li wci:A 4D optional Tensor. Must be one of the following types: float16, float32. | |||||
*@li wcf:A 4D optional Tensor. Must be one of the following types: float16, float32. | |||||
*@li wco:A 4D optional Tensor. Must be one of the following types: float16, float32. | |||||
*@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n | *@li mask:A 1D optional Tensor. Must be one of the following types: uint8. The format must be ND . \n | ||||
*@li real_mask:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li project:A 4D optional Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li real_mask:A 4D optional Tensor. Must be one of the following types: float16, float32. | |||||
*@li project:A 4D optional Tensor. Must be one of the following types: float16, float32. | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. | *@li cell_type:An string identifying the cell type in the op. Default to "LSTM". Only LSTM is currently supported. | ||||
@@ -381,14 +379,14 @@ REG_OP(DynamicRNNV2) | |||||
*@par Outputs: | *@par Outputs: | ||||
*eight outputs: | *eight outputs: | ||||
*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li y:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li i:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li j:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@par Third-party framework compatibility: | *@par Third-party framework compatibility: | ||||
* Compatible with the TF operator LSTM. | * Compatible with the TF operator LSTM. | ||||
*/ | */ | ||||
@@ -430,16 +428,16 @@ REG_OP(DynamicRNNV3) | |||||
*@brief: DynamicLSTMV2 calculation. | *@brief: DynamicLSTMV2 calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
*ten inputs: | *ten inputs: | ||||
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li x:A required 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li w:A required 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | *@li b:A required 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | ||||
*@li cont:A required 2D Tensor. Must be one of the following types: float16, float32. The format must be ND. | *@li cont:A required 2D Tensor. Must be one of the following types: float16, float32. The format must be ND. | ||||
*@li w_xc_x_static:A optional 2D Tensor. Must be one of the following types: float16, float32. The format must be ND. | *@li w_xc_x_static:A optional 2D Tensor. Must be one of the following types: float16, float32. The format must be ND. | ||||
*@li h0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li c0:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li wci:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li wcf:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li wco:A optional 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li h0:A optional 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li c0:A optional 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li wci:A optional 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li wcf:A optional 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li wco:A optional 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li mask:A optional 1D Tensor. Must be one of the following types: uint8. The format must be ND . | *@li mask:A optional 1D Tensor. Must be one of the following types: uint8. The format must be ND . | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -450,11 +448,11 @@ REG_OP(DynamicRNNV3) | |||||
*@par Outputs: | *@par Outputs: | ||||
*eight outputs: | *eight outputs: | ||||
*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li last_output_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li last_output_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li y:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li output_h:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li output_c:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li last_output_h:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li last_output_c:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@par Third-party framework compatibility: | *@par Third-party framework compatibility: | ||||
* Compatible with the Caffe operator LSTM. | * Compatible with the Caffe operator LSTM. | ||||
*@par Restrictions: | *@par Restrictions: | ||||
@@ -487,25 +485,25 @@ REG_OP(DynamicLSTMV2) | |||||
*@brief: LSTMInputGrad calculation. | *@brief: LSTMInputGrad calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
*ten inputs: \n | *ten inputs: \n | ||||
*@li w:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dc:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li w:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li c:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dc:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li i:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li j:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@par Outputs: | *@par Outputs: | ||||
*four outputs: \n | *four outputs: \n | ||||
*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dgate:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. | |||||
*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dgate:A 4D Tensor. Must be one of the following types: float16. | |||||
*/ | */ | ||||
REG_OP(LSTMInputGrad) | REG_OP(LSTMInputGrad) | ||||
.INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(w, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
@@ -531,18 +529,18 @@ REG_OP(LSTMInputGrad) | |||||
*@brief: Dynamic LSTM Cell grad calculation.Calculate the gradient of gates and cell state. | *@brief: Dynamic LSTM Cell grad calculation.Calculate the gradient of gates and cell state. | ||||
*@par Inputs: | *@par Inputs: | ||||
*twelve inputs: | *twelve inputs: | ||||
*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dc:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li i:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li j:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li mask:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li t_state:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ . \n | |||||
*@li init_c:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li c:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dc:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li i:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li j:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li f:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li mask:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li t_state:A 4D Tensor. Must be one of the following types: float16, float32. . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li forget_bias:An integer identifying the forget bias in the op. Default to 1. | *@li forget_bias:An integer identifying the forget bias in the op. Default to 1. | ||||
@@ -584,8 +582,8 @@ REG_OP(DynamicLSTMGradCell) | |||||
*@brief: Basic LSTM Cell backward calculation.Calculate the gradient of input and hidden state. | *@brief: Basic LSTM Cell backward calculation.Calculate the gradient of input and hidden state. | ||||
*@par Inputs: | *@par Inputs: | ||||
*three inputs: | *three inputs: | ||||
*@li dgate:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. | |||||
*@li w:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
*@li dgate:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li w:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li dropout_mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n | *@li dropout_mask:A 1D Tensor. Must be one of the following types: uint8. The format must be ND . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -612,9 +610,9 @@ REG_OP(BasicLSTMCellInputGrad) | |||||
*@brief: Basic LSTM Cell backward calculation.Calculate the gradient of weight and bias. | *@brief: Basic LSTM Cell backward calculation.Calculate the gradient of weight and bias. | ||||
*@par Inputs: | *@par Inputs: | ||||
*three inputs: | *three inputs: | ||||
*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. | |||||
*@li h:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. | |||||
*@li dgate:A 4D Tensor. Must be one of the following types: uint8. The format must be FRACTAL_NZ . \n | |||||
*@li x:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li h:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li dgate:A 4D Tensor. Must be one of the following types: uint8. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*two outputs: | *two outputs: | ||||
@@ -636,14 +634,14 @@ REG_OP(BasicLSTMCellWeightGrad) | |||||
*@brief: Basic LSTM Cell backward calculation.Calculate the gradient of gates and cell state. | *@brief: Basic LSTM Cell backward calculation.Calculate the gradient of gates and cell state. | ||||
*@par Inputs: | *@par Inputs: | ||||
*eight inputs: | *eight inputs: | ||||
*@li c:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dht:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li it:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li jt:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li ft:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li ot:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ . \n | |||||
*@li c:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dht:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dct:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li it:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li jt:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li ft:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li ot:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li forget_bias:An integer identifying the forget bias in the op. Default to 1. | *@li forget_bias:An integer identifying the forget bias in the op. Default to 1. | ||||
@@ -676,14 +674,14 @@ REG_OP(BasicLSTMCellCStateGrad) | |||||
*@brief: RNN operator. | *@brief: RNN operator. | ||||
*@par Inputs: | *@par Inputs: | ||||
*eight inputs: | *eight inputs: | ||||
*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. | |||||
*@li x:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li cont:A 1D Tensor. Must be one of the following types: float16. The format must be ND. | *@li cont:A 1D Tensor. Must be one of the following types: float16. The format must be ND. | ||||
*@li x_static:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. | |||||
*@li h_0:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li w_xh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
*@li w_sh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
*@li w_hh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
*@li w_ho:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
*@li x_static:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li h_0:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li w_xh:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li w_sh:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li w_hh:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li w_ho:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li bias_h:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | *@li bias_h:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | ||||
*@li bias_o:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND . \n | *@li bias_o:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND . \n | ||||
@@ -693,8 +691,8 @@ REG_OP(BasicLSTMCellCStateGrad) | |||||
*@par Outputs: | *@par Outputs: | ||||
*two outputs: | *two outputs: | ||||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li h_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li o:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li h_t:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@par Restrictions: | *@par Restrictions: | ||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
*/ | */ | ||||
@@ -719,13 +717,13 @@ REG_OP(RNN) | |||||
*@brief: BasicRNNCell operator. | *@brief: BasicRNNCell operator. | ||||
*@par Inputs: | *@par Inputs: | ||||
*eight inputs: | *eight inputs: | ||||
*@li x:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. | |||||
*@li x:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li cont:A 1D Tensor. Must be one of the following types: float16. The format must be ND. | *@li cont:A 1D Tensor. Must be one of the following types: float16. The format must be ND. | ||||
*@li w_xh_x_static:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_NZ. | |||||
*@li h_0:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li w_xh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
*@li w_hh:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
*@li w_ho:A 4D Tensor. Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
*@li w_xh_x_static:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li h_0:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li w_xh:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li w_hh:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li w_ho:A 4D Tensor. Must be one of the following types: float16. | |||||
*@li bias_h:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | *@li bias_h:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | ||||
*@li bias_o:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND . \n | *@li bias_o:A 1D Tensor. Must be one of the following types: float16, float32. The format must be ND . \n | ||||
@@ -735,8 +733,8 @@ REG_OP(RNN) | |||||
*@par Outputs: | *@par Outputs: | ||||
*two outputs: | *two outputs: | ||||
*@li o_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li h_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li o_t:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li h_t:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@par Restrictions: | *@par Restrictions: | ||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
@@ -761,13 +759,13 @@ REG_OP(BasicRNNCell) | |||||
*@brief DynamicGRU calculation. | *@brief DynamicGRU calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
*seven inputs: | *seven inputs: | ||||
*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. | |||||
*@li w:Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
*@li x:Must be one of the following types: float16. | |||||
*@li w:Must be one of the following types: float16. | |||||
*@li b:Must be one of the following types: float16, float32. The format must be ND. | *@li b:Must be one of the following types: float16, float32. The format must be ND. | ||||
*@li cw:Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
*@li cw:Must be one of the following types: float16. | |||||
*@li cb:Must be one of the following types: float16, float32. The format must be ND. | *@li cb:Must be one of the following types: float16, float32. The format must be ND. | ||||
*@li seq_length:Must be one of the following types: int32. The format must be ND. | *@li seq_length:Must be one of the following types: int32. The format must be ND. | ||||
*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li init_h:Must be one of the following types: float16, float32. | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. | *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. | ||||
@@ -781,11 +779,11 @@ REG_OP(BasicRNNCell) | |||||
*@par Outputs: | *@par Outputs: | ||||
*five outputs: | *five outputs: | ||||
*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li r:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li i:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li n:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li y:Must be one of the following types: float16, float32. | |||||
*@li output_h:Must be one of the following types: float16, float32. | |||||
*@li r:Must be one of the following types: float16, float32. | |||||
*@li i:Must be one of the following types: float16, float32. | |||||
*@li n:Must be one of the following types: float16, float32. | |||||
*@par Restrictions: | *@par Restrictions: | ||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
@@ -817,13 +815,13 @@ REG_OP(DynamicGRU) | |||||
*@brief DynamicGRUV2 calculation. | *@brief DynamicGRUV2 calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
*seven inputs: | *seven inputs: | ||||
*@li x:Must be one of the following types: float16. The format must be FRACTAL_NZ. | |||||
*@li weight_input:Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
*@li x:Must be one of the following types: float16. | |||||
*@li weight_input:Must be one of the following types: float16. | |||||
*@li weight_hidden:Must be one of the following types: float16. | |||||
*@li bias_input:Must be one of the following types: float16, float32. The format must be ND. | *@li bias_input:Must be one of the following types: float16, float32. The format must be ND. | ||||
*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. | *@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. | ||||
*@li seq_length:Must be one of the following types: float16 in FRACTAL_NZ and int32 in ND. | |||||
*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li seq_length:Must be one of the following types: int32 in ND. | |||||
*@li init_h:Must be one of the following types: float16, float32. | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. | *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. | ||||
@@ -839,12 +837,12 @@ REG_OP(DynamicGRU) | |||||
*@par Outputs: | *@par Outputs: | ||||
*six outputs: | *six outputs: | ||||
*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li y:Must be one of the following types: float16, float32. | |||||
*@li output_h:Must be one of the following types: float16, float32. | |||||
*@li update:Must be one of the following types: float16, float32. | |||||
*@li reset:Must be one of the following types: float16, float32. | |||||
*@li new:Must be one of the following types: float16, float32. | |||||
*@li hidden_new:Must be one of the following types: float16, float32. | |||||
*/ | */ | ||||
REG_OP(DynamicGRUV2) | REG_OP(DynamicGRUV2) | ||||
.INPUT(x, TensorType({DT_FLOAT16})) | .INPUT(x, TensorType({DT_FLOAT16})) | ||||
@@ -877,11 +875,11 @@ REG_OP(DynamicGRUV2) | |||||
*@brief DynamicGRUV2Hidden calculation. | *@brief DynamicGRUV2Hidden calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
*five inputs: | *five inputs: | ||||
*@li x_weight_input:Must be one of the following types: float32. The format must be FRACTAL_NZ. | |||||
*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z. | |||||
*@li x_weight_input:Must be one of the following types: float32. | |||||
*@li weight_hidden:Must be one of the following types: float16. | |||||
*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. | *@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND. | ||||
*@li seq_length:Must be one of the following types: float16 in FRACTAL_NZ and int32 in ND. | |||||
*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li seq_length:Must be one of the following types: int32 in ND. | |||||
*@li init_h:Must be one of the following types: float16, float32. | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". | *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". | ||||
@@ -899,12 +897,12 @@ Only tanh is currently supported. | |||||
*@par Outputs: | *@par Outputs: | ||||
*six outputs: | *six outputs: | ||||
*@li y:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li output_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li update:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li reset:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li hidden_new:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li y:Must be one of the following types: float16, float32. | |||||
*@li output_h:Must be one of the following types: float16, float32. | |||||
*@li update:Must be one of the following types: float16, float32. | |||||
*@li reset:Must be one of the following types: float16, float32. | |||||
*@li new:Must be one of the following types: float16, float32. | |||||
*@li hidden_new:Must be one of the following types: float16, float32. | |||||
*@par Restrictions: | *@par Restrictions: | ||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
@@ -938,20 +936,20 @@ REG_OP(DynamicGRUV2Hidden) | |||||
*@brief: DynamicGRUV2Grad calculation. | *@brief: DynamicGRUV2Grad calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
*fourteen inputs: \n | *fourteen inputs: \n | ||||
*@li x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li weight_input:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li weight_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li y:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li seq_length:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li mask:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li x:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li weight_input:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li weight_hidden:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li y:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li h:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li update:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li new:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li seq_length:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li mask:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. | *@li direction:An string identifying the direction in the op. Default to "UNIDIRECTIONAL". Only UNIDIRECTIONAL is currently supported. | ||||
@@ -965,12 +963,12 @@ REG_OP(DynamicGRUV2Hidden) | |||||
*@par Outputs: | *@par Outputs: | ||||
*six outputs: \n | *six outputs: \n | ||||
*@li dw_input:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dw_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li db_input:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dw_input:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dw_hidden:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li db_input:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li db_hidden:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@par Restrictions: | *@par Restrictions: | ||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
@@ -1010,15 +1008,15 @@ REG_OP(DynamicGRUV2Grad) | |||||
*@brief: GRUV2HiddenGrad calculation. | *@brief: GRUV2HiddenGrad calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
*nine inputs: \n | *nine inputs: \n | ||||
*@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li h:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li update:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li new:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li t_state:An Int identifying the current t state. Default to [0, 4]. | *@li t_state:An Int identifying the current t state. Default to [0, 4]. | ||||
@@ -1026,9 +1024,9 @@ REG_OP(DynamicGRUV2Grad) | |||||
*@par Outputs: | *@par Outputs: | ||||
*three outputs: \n | *three outputs: \n | ||||
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@par Restrictions: | *@par Restrictions: | ||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
@@ -1053,15 +1051,15 @@ REG_OP(GRUV2HiddenGradCell) | |||||
*@brief: DynamicGRUCellGrad calculation. | *@brief: DynamicGRUCellGrad calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
*ten inputs: \n | *ten inputs: \n | ||||
*@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.+ | |||||
*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li h:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li update:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li new:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32.+ | |||||
*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li t_state:A 1D Tensor. Must be one of the following types: int32. The format must be ND. | *@li t_state:A 1D Tensor. Must be one of the following types: int32. The format must be ND. | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -1069,9 +1067,9 @@ REG_OP(GRUV2HiddenGradCell) | |||||
*@par Outputs: | *@par Outputs: | ||||
*three outputs: \n | *three outputs: \n | ||||
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@par Restrictions: | *@par Restrictions: | ||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
@@ -1130,13 +1128,13 @@ REG_OP(EmbeddingDenseGrad) | |||||
*@brief CommonLSTM calculation. | *@brief CommonLSTM calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
*eight inputs: \n | *eight inputs: \n | ||||
*@li x:Each time step is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li w:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li r:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_ZN_LSTM. | |||||
*@li x:Each time step is a 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li w:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li r:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li b:An optional input. Each direction is a 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | *@li b:An optional input. Each direction is a 1D Tensor. Must be one of the following types: float16, float32. The format must be ND. | ||||
*@li sequence_lens:An optional input. A 1D Tensor.Must be one of the following types: int32. The format must be ND. | *@li sequence_lens:An optional input. A 1D Tensor.Must be one of the following types: int32. The format must be ND. | ||||
*@li initial_h:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li initial_c:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li initial_h:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li initial_c:An optional input. Each direction is a 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li p:An optional input. Each direction is a 1D Tensor.Must be one of the following types: float16, float32. The format must be ND. | *@li p:An optional input. Each direction is a 1D Tensor.Must be one of the following types: float16, float32. The format must be ND. | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -1150,9 +1148,9 @@ REG_OP(EmbeddingDenseGrad) | |||||
*@par Outputs: | *@par Outputs: | ||||
*three outputs: \n | *three outputs: \n | ||||
*@li y:First dimension is time step, second dimension is direction, others is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li y_h:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li y_c:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li y:First dimension is time step, second dimension is direction, others is a 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li y_h:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. | |||||
*@li y_c:Each direction is a 4D Tensor. Must be one of the following types: float16, float32. | |||||
*/ | */ | ||||
REG_OP(CommonLSTM) | REG_OP(CommonLSTM) | ||||
@@ -1202,12 +1200,12 @@ REG_OP(RnnGenMaskV2) | |||||
* @par Inputs: | * @par Inputs: | ||||
* Eight inputs, including: | * Eight inputs, including: | ||||
* @li x: The input sequences packed (and pontentially padded) into on 3D Tesnor(float16). The format must be FRACTAL_NZ | |||||
* @li w: The weight tensor for the gates is 3D Tensor(float16). The format must be FRACTAL_Z | |||||
* @li r: The recurrence weight tesnor is 3D Tensor(float16). The format must be FRACTAL_Z | |||||
* @li x: The input sequences packed (and pontentially padded) into on 3D Tesnor(float16). | |||||
* @li w: The weight tensor for the gates is 3D Tensor(float16). | |||||
* @li r: The recurrence weight tesnor is 3D Tensor(float16). | |||||
* @li b: The bias tensor for the gates. The format must be ND | * @li b: The bias tensor for the gates. The format must be ND | ||||
* @li sequence_lens: Optional tensor specifying lengths of sequences(int32). The format must be ND | * @li sequence_lens: Optional tensor specifying lengths of sequences(int32). The format must be ND | ||||
* @li init_h: Optional initial value of the hidden(float16,float32). The format must be FRACTAL_NZ | |||||
* @li init_h: Optional initial value of the hidden(float16,float32). | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li activation_alpha: Optional scaling values used by some activation functions. \n | * @li activation_alpha: Optional scaling values used by some activation functions. \n | ||||
@@ -1219,8 +1217,8 @@ REG_OP(RnnGenMaskV2) | |||||
* @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n | * @li linear_before_reset: When computing the output of the hidden gate, apply the linear transformation before multiplying by the output of the reset gate. \n | ||||
* @par Outputs: | * @par Outputs: | ||||
* @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32). The format must be FRACTAL_NZ | |||||
* @li y_h: The last output value of the hidden(float16,float32). The format must be FRACTAL_NZ | |||||
* @li y: A Tensor that concats all the intermediate output values of the hidden(float16,float32). | |||||
* @li y_h: The last output value of the hidden(float16,float32). | |||||
*/ | */ | ||||
REG_OP(CommonGRU) | REG_OP(CommonGRU) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
@@ -1251,17 +1249,17 @@ REG_OP(CommonGRU) | |||||
* @li per_sample_weights: to indicate all weights should be taken to be 1. | * @li per_sample_weights: to indicate all weights should be taken to be 1. | ||||
* If specified, per_sample_weights must have exactly the same shape as input | * If specified, per_sample_weights must have exactly the same shape as input | ||||
* and is treated as having the same offsets, if those are not None. | * and is treated as having the same offsets, if those are not None. | ||||
* Only supported for mode='sum'..\n | |||||
* Only supported for mode='sum'.\n | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li mode: An string attr which use "sum"``, ``"mean"`` or ``"max"``. Specifies the way to reduce the bag.. \n | |||||
* @li mode: An string attr which use "sum"``, ``"mean"`` or ``"max"``. Specifies the way to reduce the bag. \n | |||||
* @li scale_grad_by_freq: An optional bool. Defaults to "False". | * @li scale_grad_by_freq: An optional bool. Defaults to "False". | ||||
* If "True", "grad_weight" will be scale by word_frequency. | * If "True", "grad_weight" will be scale by word_frequency. | ||||
* If "False", "grad_weight" will not be scale by word_frequency. \n | * If "False", "grad_weight" will not be scale by word_frequency. \n | ||||
* @li sparse: if True, gradient w.r.t.attr weight matrix will be a sparse tensor. \n | * @li sparse: if True, gradient w.r.t.attr weight matrix will be a sparse tensor. \n | ||||
* @li include_last_offset: if True, attr offsets has one additional element, where the last element | * @li include_last_offset: if True, attr offsets has one additional element, where the last element | ||||
* is equivalent to the size of indices. This matches the CSR format.. \n | |||||
* is equivalent to the size of indices. This matches the CSR format. \n | |||||
* @par Outputs: | * @par Outputs: | ||||
* y: A mutable output Tensor of new word grad has the same type as "grads". \n | * y: A mutable output Tensor of new word grad has the same type as "grads". \n | ||||
@@ -188,7 +188,7 @@ REG_OP(ParallelConcat) | |||||
*@par Inputs: | *@par Inputs: | ||||
* One input: | * One input: | ||||
*x: Dynamic input.An NC1HWC0 or ND Tensor. | |||||
*x: Dynamic input.A ND Tensor. | |||||
*Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 | *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -219,7 +219,7 @@ REG_OP(ConcatV2D) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Two inputs, including: | * Two inputs, including: | ||||
*@li Dynamic input "x" is An NC1HWC0 or ND Tensor. | |||||
*@li Dynamic input "x" is A ND Tensor. | |||||
*Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 | *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 | ||||
*@li concat_dim: An int32, or int64. Specifies the dimension along which to concatenate . \n | *@li concat_dim: An int32, or int64. Specifies the dimension along which to concatenate . \n | ||||
@@ -247,7 +247,7 @@ REG_OP(ConcatV2) | |||||
*@par Inputs: | *@par Inputs: | ||||
* One input: | * One input: | ||||
*x:Dynamic input. An NC1HWC0 or ND Tensor. | |||||
*x:Dynamic input. A ND Tensor. | |||||
*Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 | *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -278,7 +278,7 @@ REG_OP(ConcatD) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Two inputs, including: | * Two inputs, including: | ||||
*@li x: Dynamic input.An NC1HWC0 or ND Tensor. | |||||
*@li x: Dynamic input.A ND Tensor. | |||||
*Must be one of the following types: float16, float32, double, int32, | *Must be one of the following types: float16, float32, double, int32, | ||||
* uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, | * uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, | ||||
* complex128, uint32, uint64, qint16, quint16. | * complex128, uint32, uint64, qint16, quint16. | ||||
@@ -39,7 +39,7 @@ namespace ge { | |||||
*input_format: A required string, specifying the input format. \n | *input_format: A required string, specifying the input format. \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: The output tensor of type uint8, format only support NC1HWC0_C04. | |||||
*y: The output tensor of type uint8. | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* It is a custom operator. It has no corresponding operator in Caffe. | * It is a custom operator. It has no corresponding operator in Caffe. | ||||
* | * | ||||
@@ -138,9 +138,11 @@ REG_OP(Transpose) | |||||
* For branches without padding also can be types: int16, int64, uint8, uint16, uint32, uint64 . \n | * For branches without padding also can be types: int16, int64, uint8, uint16, uint32, uint64 . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Z" etc. | |||||
*@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Z" etc. | |||||
*@li group: A optional int32, default value is 1. \n | |||||
*@li src_format: A string source data format, can be "NHWC", "NCHW" etc. | |||||
*@li dst_format: A string target data format, can be "NCHW" etc. | |||||
*@li src_subformat: A optional int32 for source sub-format, default value is 0. | |||||
*@li dst_subformat: A optional int32 for target sub-format, default value is 0. | |||||
*@li groups: A optional int32, default value is 1. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*dst: A Tensor. Has the same type as "src". | *dst: A Tensor. Has the same type as "src". | ||||
@@ -150,6 +152,8 @@ REG_OP(TransData) | |||||
.OUTPUT(dst, TensorType::BasicType()) | .OUTPUT(dst, TensorType::BasicType()) | ||||
.REQUIRED_ATTR(src_format, String) | .REQUIRED_ATTR(src_format, String) | ||||
.REQUIRED_ATTR(dst_format, String) | .REQUIRED_ATTR(dst_format, String) | ||||
.ATTR(src_subformat, Int, 0) | |||||
.ATTR(dst_subformat, Int, 0) | |||||
.ATTR(groups, Int, 1) | .ATTR(groups, Int, 1) | ||||
.OP_END_FACTORY_REG(TransData) | .OP_END_FACTORY_REG(TransData) | ||||
@@ -236,13 +240,13 @@ REG_OP(Flatten) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Three inputs, including: | * Three inputs, including: | ||||
*@li x: A 5D Tensor of type float16 or int8 or uint8, with format NC1HWC0. | |||||
*@li x: A 5D Tensor of type float16 or int8 or uint8. | |||||
*@li block_shape: A 1D list or tuple of int32 or int64. | *@li block_shape: A 1D list or tuple of int32 or int64. | ||||
*@li crops: A 2D list or tuple of int32 or int64. Specifies the amount to | *@li crops: A 2D list or tuple of int32 or int64. Specifies the amount to | ||||
*crop from start and end dimensions after permutation . \n | *crop from start and end dimensions after permutation . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor with format NC1HWC0. Has the same type as input "x" . \n | |||||
*y: A Tensor has the same type as input "x" . \n | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator BatchToSpaceND. | * Compatible with the TensorFlow operator BatchToSpaceND. | ||||
@@ -259,7 +263,7 @@ REG_OP(BatchToSpaceND) | |||||
*@par Inputs: | *@par Inputs: | ||||
* One input: | * One input: | ||||
*x: A 5D Tensor of type float16 or int8 or uint8, with format NC1HWC0 . \n | |||||
*x: A 5D Tensor of type float16 or int8 or uint8. \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li block_shape: A required 1D list or tuple of int32 or int64. | *@li block_shape: A required 1D list or tuple of int32 or int64. | ||||
@@ -267,7 +271,7 @@ REG_OP(BatchToSpaceND) | |||||
* from the start and end dimensions after permutation . \n | * from the start and end dimensions after permutation . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor with format NC1HWC0. Has the same type as input "x". | |||||
*y: A Tensor has the same type as input "x". | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
@@ -288,12 +292,12 @@ REG_OP(BatchToSpaceNDD) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Three inputs, including: | * Three inputs, including: | ||||
*@li x: A 5D Tensor of type float16 or float32, with format NC1HWC0. | |||||
*@li x: A 5D Tensor of type float16 or float32. | |||||
*@li block_shape: A 1D list or tuple of int32 or int64. | *@li block_shape: A 1D list or tuple of int32 or int64. | ||||
*@li paddings: A 2D list or tuple of int32 or int64. Specifies the padding for the start and end dimensions after permutation . \n | *@li paddings: A 2D list or tuple of int32 or int64. Specifies the padding for the start and end dimensions after permutation . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor with format NC1HWC0. Has the same type as input "x" . \n | |||||
*y: A Tensor has the same type as input "x" . \n | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator SpaceToBatchND. | * Compatible with the TensorFlow operator SpaceToBatchND. | ||||
@@ -310,14 +314,14 @@ REG_OP(SpaceToBatchND) | |||||
*@par Inputs: | *@par Inputs: | ||||
* One input: | * One input: | ||||
*x: A 5D Tensor of type float16 or float32, with format NC1HWC0 . \n | |||||
*x: A 5D Tensor of type float16 or float32. \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li block_shape: A required 1D list or tuple of int32 or int64. | *@li block_shape: A required 1D list or tuple of int32 or int64. | ||||
*@li paddings: A required 2D list or tuple of int32 or int64. Specifies the padding for the start and end dimensions after permutation . \n | *@li paddings: A required 2D list or tuple of int32 or int64. Specifies the padding for the start and end dimensions after permutation . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor with format NC1HWC0. Has the same type as input "x" . \n | |||||
*y: A Tensor has the same type as input "x" . \n | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator SpaceToBatchND. | * Compatible with the TensorFlow operator SpaceToBatchND. | ||||
@@ -516,7 +520,7 @@ REG_OP(SpaceToBatchD) | |||||
* tensors . \n | * tensors . \n | ||||
* @par Inputs: | * @par Inputs: | ||||
* x: A rank-R tensor (R > 0) of type BasicType, with format ND or NC1HWC0 . \n | |||||
* x: A rank-R tensor (R > 0) of type BasicType. \n | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li num: A required int, specifying the number of tensors to be unpacked to. | * @li num: A required int, specifying the number of tensors to be unpacked to. | ||||
@@ -529,8 +533,7 @@ REG_OP(SpaceToBatchD) | |||||
* @attention Constraints: | * @attention Constraints: | ||||
* @li If "num" is not specified, it is inferred from the shape of "x". | * @li If "num" is not specified, it is inferred from the shape of "x". | ||||
* @li For the ND format, "axis" is in the range [-R, R); For the NC1HWC0 format, | |||||
* "axis" must not be 2, 3, -2, or -3 . \n | |||||
* @li For the ND format, "axis" is in the range [-R, R). \n | |||||
* @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator Unpack. | * Compatible with the TensorFlow operator Unpack. | ||||
@@ -140,6 +140,16 @@ typedef void *rtModel_t; | |||||
#define RT_PROF_MAX_DEV_NUM 64 | #define RT_PROF_MAX_DEV_NUM 64 | ||||
#define PATH_LEN_MAX 1023 | |||||
#define PARAM_LEN_MAX 4095 | |||||
typedef struct rtCommandHandleParams { | |||||
uint32_t pathLen; | |||||
uint32_t storageLimit; // MB | |||||
uint32_t profDataLen; | |||||
char_t path[PATH_LEN_MAX + 1]; | |||||
char_t profData[PARAM_LEN_MAX + 1]; | |||||
} rtCommandHandleParams_t; | |||||
/** | /** | ||||
* @ingroup profiling_base | * @ingroup profiling_base | ||||
* @brief profiling command info | * @brief profiling command info | ||||
@@ -151,6 +161,7 @@ typedef struct rtProfCommandHandle { | |||||
uint32_t devIdList[RT_PROF_MAX_DEV_NUM]; | uint32_t devIdList[RT_PROF_MAX_DEV_NUM]; | ||||
uint32_t modelId; | uint32_t modelId; | ||||
uint32_t type; | uint32_t type; | ||||
rtCommandHandleParams_t commandHandleParams; | |||||
} rtProfCommandHandle_t; | } rtProfCommandHandle_t; | ||||
/** | /** | ||||
@@ -131,6 +131,32 @@ typedef struct tagRtArgsWithTiling { | |||||
} rtArgsWithTiling_t; | } rtArgsWithTiling_t; | ||||
/** | /** | ||||
* @ingroup rt_kernel | |||||
* @brief host memory input struct | |||||
*/ | |||||
typedef struct rtHostInputInfo { | |||||
uint16_t addrOffset; | |||||
uint16_t dataOffset; | |||||
} rtHostInputInfo_t; | |||||
/** | |||||
* @ingroup rt_kernel | |||||
* @brief args struct | |||||
*/ | |||||
typedef struct tagRtArgsEx { | |||||
void *args; // args host mem addr | |||||
rtHostInputInfo_t *hostInputInfoPtr; // nullptr means no host mem input | |||||
uint32_t argsSize; // input + output + tiling addr size + tiling data size + host mem | |||||
uint16_t tilingAddrOffset; // tiling addr offset | |||||
uint16_t tilingDataOffset; // tiling data offset | |||||
uint16_t hostInputInfoNum; // hostInputInfo num | |||||
uint8_t hasTiling; // if has tiling: 0 means no tiling | |||||
uint8_t isNoNeedH2DCopy; // is no need host to device copy: 0 means need H2D copy, | |||||
// others means doesn't need H2D copy. | |||||
uint8_t reserved[4]; | |||||
} rtArgsEx_t; | |||||
/** | |||||
* @ingroup rt_KernelConfigDump | * @ingroup rt_KernelConfigDump | ||||
* @brief device dump type | * @brief device dump type | ||||
*/ | */ | ||||
@@ -375,39 +401,70 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void * | |||||
/** | /** | ||||
* @ingroup rt_kernel | * @ingroup rt_kernel | ||||
* @brief launch kernel with handle to device | * @brief launch kernel with handle to device | ||||
* @param [in] hdl program | |||||
* @param [in] hdl program | |||||
* @param [in] kernelInfoExt kernel Info extension. device function description or tiling key, | * @param [in] kernelInfoExt kernel Info extension. device function description or tiling key, | ||||
* depending static shape or dynmaic shape. | * depending static shape or dynmaic shape. | ||||
* @param [in] blockDim block dimentions | |||||
* @param [in] args argments address for kernel function | |||||
* @param [in] argsSize argements size | |||||
* @param [in] smDesc shared memory description | |||||
* @param [in] stm associated stream | |||||
* @param [in] kernelInfo kernel info | |||||
* @param [in] blockDim block dimentions | |||||
* @param [in] argsInfo argments address for kernel function | |||||
* @param [in] smDesc shared memory description | |||||
* @param [in] stm associated stream | |||||
* @param [in] kernelInfo kernel info | |||||
* @return RT_ERROR_NONE for ok | * @return RT_ERROR_NONE for ok | ||||
* @return RT_ERROR_INVALID_VALUE for error input | * @return RT_ERROR_INVALID_VALUE for error input | ||||
*/ | */ | ||||
RTS_API rtError_t rtKernelLaunchWithHandle(void *hdl, const void *kernelInfoExt, uint32_t blockDim, | RTS_API rtError_t rtKernelLaunchWithHandle(void *hdl, const void *kernelInfoExt, uint32_t blockDim, | ||||
void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream_, | |||||
rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm, | |||||
const void *kernelInfo); | const void *kernelInfo); | ||||
/** | /** | ||||
* @ingroup rt_kernel | |||||
* @ingroup rtKernelLaunchWithHandleV2 | |||||
* @brief launch kernel with handle to device | |||||
* @param [in] hdl program | |||||
* @param [in] kernelInfoExt kernel Info extension. device function description or tiling key, | |||||
* depending static shape or dynmaic shape. | |||||
* @param [in] blockDim block dimentions | |||||
* @param [in] argsInfo argments address for kernel function | |||||
* @param [in] smDesc shared memory description | |||||
* @param [in] stm associated stream | |||||
* @param [in] kernelInfo kernel info | |||||
* @return RT_ERROR_NONE for ok | |||||
* @return RT_ERROR_INVALID_VALUE for error input | |||||
*/ | |||||
RTS_API rtError_t rtKernelLaunchWithHandleV2(void *hdl, const void *kernelInfoExt, uint32_t blockDim, | |||||
rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm, | |||||
const void *kernelInfo); | |||||
/** | |||||
* @ingroup rtKernelLaunchWithFlag | |||||
* @brief launch kernel to device | * @brief launch kernel to device | ||||
* @param [in] stubFunc stub function | * @param [in] stubFunc stub function | ||||
* @param [in] blockDim block dimentions | * @param [in] blockDim block dimentions | ||||
* @param [in] args argments address for kernel function | |||||
* @param [in] argsSize argements size | |||||
* @param [in] smDesc shared memory description | |||||
* @param [in] stm associated stream | |||||
* @param [in] flag dump flag | |||||
* @param [in] argsInfo argments address for kernel function | |||||
* @param [in] smDesc shared memory description | |||||
* @param [in] stm associated stream | |||||
* @param [in] flags dump flag | |||||
* @return RT_ERROR_NONE for ok | * @return RT_ERROR_NONE for ok | ||||
* @return RT_ERROR_INVALID_VALUE for error input | * @return RT_ERROR_INVALID_VALUE for error input | ||||
*/ | */ | ||||
RTS_API rtError_t rtKernelLaunchWithFlag(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize, | |||||
RTS_API rtError_t rtKernelLaunchWithFlag(const void *stubFunc, uint32_t blockDim, rtArgsEx_t *argsInfo, | |||||
rtSmDesc_t *smDesc, rtStream_t stm, uint32_t flags); | rtSmDesc_t *smDesc, rtStream_t stm, uint32_t flags); | ||||
/** | /** | ||||
* @ingroup rtKernelLaunchWithFlagV2 | |||||
* @brief launch kernel to device | |||||
* @param [in] stubFunc stub function | |||||
* @param [in] blockDim block dimentions | |||||
* @param [in] argsInfo argments address for kernel function | |||||
* @param [in] smDesc shared memory description | |||||
* @param [in] stm associated stream | |||||
* @param [in] flags dump flag | |||||
* @return RT_ERROR_NONE for ok | |||||
* @return RT_ERROR_INVALID_VALUE for error input | |||||
*/ | |||||
RTS_API rtError_t rtKernelLaunchWithFlagV2(const void *stubFunc, uint32_t blockDim, rtArgsEx_t *argsInfo, | |||||
rtSmDesc_t *smDesc, rtStream_t stm, uint32_t flags); | |||||
/** | |||||
* @ingroup rt_kernel(abandoned) | * @ingroup rt_kernel(abandoned) | ||||
* @brief launch kernel to device | * @brief launch kernel to device | ||||
* @param [in] args argments address for kernel function | * @param [in] args argments address for kernel function | ||||
@@ -465,38 +522,70 @@ RTS_API rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames, | |||||
uint32_t blockDim, const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stm); | uint32_t blockDim, const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stm); | ||||
/** | /** | ||||
* @ingroup rt_kernel(abandoned) | |||||
* @ingroup rtCpuKernelLaunchWithFlag(abandoned) | |||||
* @brief launch cpu kernel to device with dump identifier | * @brief launch cpu kernel to device with dump identifier | ||||
* @param [in] soName so name | * @param [in] soName so name | ||||
* @param [in] kernelName kernel name | * @param [in] kernelName kernel name | ||||
* @param [in] blockDim block dimentions | * @param [in] blockDim block dimentions | ||||
* @param [in] args argments address for kernel function | |||||
* @param [in] argsSize argments size | |||||
* @param [in] argsInfo argments address for kernel function | |||||
* @param [in] smDesc shared memory description | * @param [in] smDesc shared memory description | ||||
* @param [in] stm associated stream | |||||
* @param [in] stm associated stream | |||||
* @param [in] flag dump flag or others function flag | * @param [in] flag dump flag or others function flag | ||||
* @return RT_ERROR_NONE for ok | * @return RT_ERROR_NONE for ok | ||||
* @return RT_ERROR_INVALID_VALUE for error input | * @return RT_ERROR_INVALID_VALUE for error input | ||||
*/ | */ | ||||
RTS_API rtError_t rtCpuKernelLaunchWithFlag(const void *soName, const void *kernelName, uint32_t blockDim, | RTS_API rtError_t rtCpuKernelLaunchWithFlag(const void *soName, const void *kernelName, uint32_t blockDim, | ||||
const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stm, | |||||
const rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm, | |||||
uint32_t flags); | uint32_t flags); | ||||
/** | /** | ||||
* @ingroup rt_kernel(in use) | |||||
* @ingroup rtCpuKernelLaunchWithFlagV2 | |||||
* @brief launch cpu kernel to device with dump identifier | |||||
* @param [in] soName so name | |||||
* @param [in] kernelName kernel name | |||||
* @param [in] blockDim block dimentions | |||||
* @param [in] argsInfo argments address for kernel function | |||||
* @param [in] smDesc shared memory description | |||||
* @param [in] stm associated stream | |||||
* @param [in] flags dump flag or others function flag | |||||
* @return RT_ERROR_NONE for ok | |||||
* @return RT_ERROR_INVALID_VALUE for error input | |||||
*/ | |||||
RTS_API rtError_t rtCpuKernelLaunchWithFlagV2(const void *soName, const void *kernelName, uint32_t blockDim, | |||||
const rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm, | |||||
uint32_t flags); | |||||
/** | |||||
* @ingroup rtAicpuKernelLaunchWithFlag(in use) | |||||
* @brief launch cpu kernel to device with dump identifier | * @brief launch cpu kernel to device with dump identifier | ||||
* @param [in] launchNames names for kernel launch | * @param [in] launchNames names for kernel launch | ||||
* @param [in] blockDim block dimentions | * @param [in] blockDim block dimentions | ||||
* @param [in] args argments address for kernel function | * @param [in] args argments address for kernel function | ||||
* @param [in] argsSize argments size | |||||
* @param [in] smDesc shared memory description | * @param [in] smDesc shared memory description | ||||
* @param [in] stm associated stream | |||||
* @param [in] flag dump flag or others function flag | |||||
* @param [in] stm associated stream | |||||
* @param [in] flags dump flag or others function flag | |||||
* @return RT_ERROR_NONE for ok | * @return RT_ERROR_NONE for ok | ||||
* @return RT_ERROR_INVALID_VALUE for error input | * @return RT_ERROR_INVALID_VALUE for error input | ||||
*/ | */ | ||||
RTS_API rtError_t rtAicpuKernelLaunchWithFlag(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, | RTS_API rtError_t rtAicpuKernelLaunchWithFlag(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, | ||||
const void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stm, uint32_t flags); | |||||
const rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm, | |||||
uint32_t flags); | |||||
/** | |||||
* @ingroup rtAicpuKernelLaunchWithFlagV2(in use) | |||||
* @brief launch cpu kernel to device with dump identifier | |||||
* @param [in] launchNames names for kernel launch | |||||
* @param [in] blockDim block dimentions | |||||
* @param [in] argsInfo argments address for kernel function | |||||
* @param [in] smDesc shared memory description | |||||
* @param [in] stm associated stream | |||||
* @param [in] flags dump flag or others function flag | |||||
* @return RT_ERROR_NONE for ok | |||||
* @return RT_ERROR_INVALID_VALUE for error input | |||||
*/ | |||||
RTS_API rtError_t rtAicpuKernelLaunchWithFlagV2(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, | |||||
const rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm, | |||||
uint32_t flags); | |||||
/** | /** | ||||
* @ingroup rt_kernel | * @ingroup rt_kernel | ||||
@@ -57,6 +57,14 @@ extern "C" { | |||||
#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P (0x4000U) // Malloc mem only use huge page, use for p2p, 0x1U << 14U | #define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P (0x4000U) // Malloc mem only use huge page, use for p2p, 0x1U << 14U | ||||
#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P (0x8000U) // Malloc mem only use default page, use for p2p, 0x1U << 15U | #define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P (0x8000U) // Malloc mem only use default page, use for p2p, 0x1U << 15U | ||||
/** | |||||
* @ingroup dvrt_mem | |||||
* @brief memory attribute | |||||
*/ | |||||
#define RT_MEMORY_ATTRIBUTE_DEFAULT (0x0U) | |||||
// memory read only attribute, now only dvpp memory support. | |||||
#define RT_MEMORY_ATTRIBUTE_READONLY (0x100000U) // Malloc readonly, 1<<20. | |||||
#define MEM_ALLOC_TYPE_BIT (0x3FFU) // mem type bit in <0, 9> | #define MEM_ALLOC_TYPE_BIT (0x3FFU) // mem type bit in <0, 9> | ||||
/** | /** | ||||
@@ -232,6 +240,18 @@ RTS_API rtError_t rtDvppMalloc(void **devPtr, uint64_t size); | |||||
/** | /** | ||||
* @ingroup dvrt_mem | * @ingroup dvrt_mem | ||||
* @brief alloc device memory for dvpp, support set flag | |||||
* @param [in|out] devPtr memory pointer | |||||
* @param [in] size memory size | |||||
* @param [in] flag mem flag, can use mem attribute set read only. | |||||
* @return RT_ERROR_NONE for ok | |||||
* @return RT_ERROR_INVALID_VALUE for error input | |||||
* @return others is error | |||||
*/ | |||||
RTS_API rtError_t rtDvppMallocWithFlag(void **devPtr, uint64_t size, uint32_t flag); | |||||
/** | |||||
* @ingroup dvrt_mem | |||||
* @brief free device memory for dvpp | * @brief free device memory for dvpp | ||||
* @param [in|out] devPtr memory pointer | * @param [in|out] devPtr memory pointer | ||||
* @return RT_ERROR_NONE for ok | * @return RT_ERROR_NONE for ok | ||||
@@ -36,4 +36,4 @@ RTS_API rtError_t rtFftsPlusTaskLaunchWithFlag(rtFftsPlusTaskInfo_t *fftsPlusTas | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | ||||
} | } | ||||
#endif | #endif | ||||
#endif // CCE_RUNTIME_RT_FFTS_PLUS_H | |||||
#endif // CCE_RUNTIME_RT_FFTS_PLUS_H |
@@ -82,6 +82,17 @@ RTS_API rtError_t rtStreamWaitEvent(rtStream_t stm, rtEvent_t evt); | |||||
/** | /** | ||||
* @ingroup dvrt_stream | * @ingroup dvrt_stream | ||||
* @brief wait an recorded event for stream, used for 1951 pg1 | |||||
* @param [in] stm the wait stream | |||||
* @param [in] event the event to wait | |||||
* @param [in] timeout timeout value for 1951 pg1 | |||||
* @return RT_ERROR_NONE for ok | |||||
* @return RT_ERROR_INVALID_VALUE for error input | |||||
*/ | |||||
RTS_API rtError_t rtStreamWaitEventWithTimeout(rtStream_t stm, rtEvent_t evt, uint32_t timeout); | |||||
/** | |||||
* @ingroup dvrt_stream | |||||
* @brief wait stream to be complete | * @brief wait stream to be complete | ||||
* @param [in] stm stream to wait | * @param [in] stm stream to wait | ||||
* @return RT_ERROR_NONE for ok | * @return RT_ERROR_NONE for ok | ||||
@@ -36,6 +36,7 @@ | |||||
#define PROF_SUBTASK_TIME 0x0000040000000ULL | #define PROF_SUBTASK_TIME 0x0000040000000ULL | ||||
#define PROF_OP_DETAIL 0x0000080000000ULL | #define PROF_OP_DETAIL 0x0000080000000ULL | ||||
#define PROF_AICPU_MODEL 0x4000000000000000ULL | |||||
#define PROF_MODEL_LOAD 0x8000000000000000ULL | #define PROF_MODEL_LOAD 0x8000000000000000ULL | ||||
#define PROF_TASK_TRACE (PROF_MODEL_EXECUTE | PROF_RUNTIME_TRACE | PROF_TRAINING_TRACE | \ | #define PROF_TASK_TRACE (PROF_MODEL_EXECUTE | PROF_RUNTIME_TRACE | PROF_TRAINING_TRACE | \ | ||||
@@ -69,6 +70,7 @@ | |||||
#define PROF_SUBTASK_TIME_MASK 0x0000040000000ULL | #define PROF_SUBTASK_TIME_MASK 0x0000040000000ULL | ||||
#define PROF_OP_DETAIL_MASK 0x0000080000000ULL | #define PROF_OP_DETAIL_MASK 0x0000080000000ULL | ||||
#define PROF_AICPU_MODEL_MASK 0x4000000000000000ULL | |||||
#define PROF_MODEL_LOAD_MASK 0x8000000000000000ULL | #define PROF_MODEL_LOAD_MASK 0x8000000000000000ULL | ||||
#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) | #if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER)) | ||||
@@ -114,6 +114,7 @@ enum MsprofCtrlCallbackType { | |||||
MSPROF_CTRL_INIT_ACL_JSON, // start pro with acl.json | MSPROF_CTRL_INIT_ACL_JSON, // start pro with acl.json | ||||
MSPROF_CTRL_INIT_GE_OPTIONS, // start profiling with ge env and options | MSPROF_CTRL_INIT_GE_OPTIONS, // start profiling with ge env and options | ||||
MSPROF_CTRL_FINALIZE, // stop profiling | MSPROF_CTRL_FINALIZE, // stop profiling | ||||
MSPROF_CTRL_INIT_HELPER, // start profiling in helper device | |||||
MSPROF_CTRL_INIT_DYNA = 0xFF, // start profiling for dynamic profiling | MSPROF_CTRL_INIT_DYNA = 0xFF, // start profiling for dynamic profiling | ||||
}; | }; | ||||
@@ -28,6 +28,7 @@ enum MsprofDataTag { | |||||
MSPROF_RUNTIME_DATA_TAG_API = 40, //runtime data tag, range: 40~59 | MSPROF_RUNTIME_DATA_TAG_API = 40, //runtime data tag, range: 40~59 | ||||
MSPROF_RUNTIME_DATA_TAG_TRACK = 41, | MSPROF_RUNTIME_DATA_TAG_TRACK = 41, | ||||
MSPROF_AICPU_DATA_TAG = 60, //aicpu data tag, range: 60~79 | MSPROF_AICPU_DATA_TAG = 60, //aicpu data tag, range: 60~79 | ||||
MSPROF_AICPU_MODEL_TAG = 61, | |||||
MSPROF_HCCL_DATA_TAG = 80, //hccl data tag, range: 80~99 | MSPROF_HCCL_DATA_TAG = 80, //hccl data tag, range: 80~99 | ||||
MSPROF_DP_DATA_TAG = 100, //dp data tag, range: 100~119 | MSPROF_DP_DATA_TAG = 100, //dp data tag, range: 100~119 | ||||
MSPROF_MSPROFTX_DATA_TAG = 120, //hccl data tag, range: 120~139 | MSPROF_MSPROFTX_DATA_TAG = 120, //hccl data tag, range: 120~139 | ||||
@@ -52,6 +53,16 @@ struct MsprofMixData { | |||||
} data; | } data; | ||||
}; | }; | ||||
#define PATH_LEN_MAX 1023 | |||||
#define PARAM_LEN_MAX 4095 | |||||
struct MsprofCommandHandleParams { | |||||
uint32_t pathLen; | |||||
uint32_t storageLimit; // MB | |||||
uint32_t profDataLen; | |||||
char path[PATH_LEN_MAX + 1]; | |||||
char profData[PARAM_LEN_MAX + 1]; | |||||
}; | |||||
/** | /** | ||||
* @brief profiling command info | * @brief profiling command info | ||||
*/ | */ | ||||
@@ -63,6 +74,7 @@ struct MsprofCommandHandle { | |||||
uint32_t devIdList[MSPROF_MAX_DEV_NUM]; | uint32_t devIdList[MSPROF_MAX_DEV_NUM]; | ||||
uint32_t modelId; | uint32_t modelId; | ||||
uint32_t type; | uint32_t type; | ||||
struct MsprofCommandHandleParams params; | |||||
}; | }; | ||||
/** | /** | ||||
@@ -305,6 +317,19 @@ struct MsprofAicpuProfData { | |||||
uint8_t reserve[MSPROF_AICPU_DATA_RESERVE_BYTES]; | uint8_t reserve[MSPROF_AICPU_DATA_RESERVE_BYTES]; | ||||
}; | }; | ||||
struct MsprofAicpuModelProfData { | |||||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||||
uint16_t dataTag = MSPROF_AICPU_MODEL_TAG; | |||||
uint32_t rsv; // Ensure 8-byte alignment | |||||
uint64_t timeStamp; | |||||
uint64_t indexId; | |||||
uint32_t modelId; | |||||
uint16_t tagId; | |||||
uint16_t rsv1; | |||||
uint64_t eventId; | |||||
uint8_t reserve[24]; | |||||
}; | |||||
/** | /** | ||||
* @brief struct of data reported by DP | * @brief struct of data reported by DP | ||||
*/ | */ | ||||