Browse Source

feat(dnn/atlas): add atlas stub

GitOrigin-RevId: c63294378e
release-1.2
Megvii Engine Team 4 years ago
parent
commit
d1fbec4fe2
15 changed files with 7607 additions and 2 deletions
  1. +2
    -2
      CMakeLists.txt
  2. +6
    -0
      dnn/atlas-stub/CMakeLists.txt
  3. +50
    -0
      dnn/atlas-stub/include/acl/acl.h
  4. +367
    -0
      dnn/atlas-stub/include/acl/acl_base.h
  5. +932
    -0
      dnn/atlas-stub/include/acl/acl_mdl.h
  6. +454
    -0
      dnn/atlas-stub/include/acl/acl_op.h
  7. +656
    -0
      dnn/atlas-stub/include/acl/acl_rt.h
  8. +413
    -0
      dnn/atlas-stub/include/acl/ops/acl_cblas.h
  9. +1675
    -0
      dnn/atlas-stub/include/acl/ops/acl_dvpp.h
  10. +261
    -0
      dnn/atlas-stub/include/acl/ops/acl_fv.h
  11. +94
    -0
      dnn/atlas-stub/src/libacl_cblas-wrap.cpp
  12. +238
    -0
      dnn/atlas-stub/src/libacl_cblas-wrap.h
  13. +172
    -0
      dnn/atlas-stub/src/libatlas-wrap.cpp
  14. +2286
    -0
      dnn/atlas-stub/src/libatlas-wrap.h
  15. +1
    -0
      scripts/whl/manylinux2010/do_build.sh

+ 2
- 2
CMakeLists.txt View File

@@ -507,8 +507,8 @@ endif ()


if(MGE_WITH_ATLAS)
include(cmake/aclrt.cmake)
list(APPEND MGE_ATLAS_LIBS libascendcl)
add_subdirectory(dnn/atlas-stub)
list(APPEND MGE_ATLAS_LIBS atlas-stub)
set(MGE_ATLAS_LIBS "${MGE_ATLAS_LIBS}")
set(MGB_ATLAS ${MGE_WITH_ATLAS})
endif()


+ 6
- 0
dnn/atlas-stub/CMakeLists.txt View File

@@ -0,0 +1,6 @@
add_library(atlas-stub STATIC src/libatlas-wrap.cpp)
target_include_directories(atlas-stub PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
install(TARGETS atlas-stub EXPORT ${MGE_EXPORT_TARGETS})

add_library(acl-cblas STATIC src/libacl_cblas-wrap.cpp)
target_include_directories(acl-cblas PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)

+ 50
- 0
dnn/atlas-stub/include/acl/acl.h View File

@@ -0,0 +1,50 @@
/**
* @file acl.h
*
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/

#ifndef INC_EXTERNAL_ACL_ACL_H_
#define INC_EXTERNAL_ACL_ACL_H_

#include "acl_rt.h"
#include "acl_op.h"
#include "acl_mdl.h"

#ifdef __cplusplus
extern "C" {
#endif

/**
* @ingroup AscendCL
* @brief acl initialize
*
* @par Restriction
* The aclInit interface can be called only once in a process
* @param configPath [IN] the config path,it can be NULL
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclInit(const char *configPath);

/**
* @ingroup AscendCL
* @brief acl finalize
*
* @par Restriction
* Need to call aclFinalize before the process exits.
* After calling aclFinalize,the services cannot continue to be used normally.
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclFinalize();

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_H_

+ 367
- 0
dnn/atlas-stub/include/acl/acl_base.h View File

@@ -0,0 +1,367 @@
/**
* @file acl_base.h
*
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/

#ifndef INC_EXTERNAL_ACL_ACL_BASE_H_
#define INC_EXTERNAL_ACL_ACL_BASE_H_

#include <stdint.h>
#include <stddef.h>

#ifdef __cplusplus
extern "C" {
#endif

#ifdef FUNC_VISIBILITY
#define ACL_FUNC_VISIBILITY __attribute__((visibility("default")))
#else
#define ACL_FUNC_VISIBILITY
#endif

typedef void *aclrtStream;
typedef void *aclrtEvent;
typedef void *aclrtContext;
typedef int aclError;
typedef uint16_t aclFloat16;
typedef struct aclDataBuffer aclDataBuffer;
typedef struct aclTensorDesc aclTensorDesc;

const int ACL_ERROR_NONE = 0;

const int ACL_ERROR_INVALID_PARAM = 100000;
const int ACL_ERROR_UNINITIALIZE = 100001;
const int ACL_ERROR_REPEAT_INITIALIZE = 100002;
const int ACL_ERROR_INVALID_FILE = 100003;
const int ACL_ERROR_WRITE_FILE = 100004;
const int ACL_ERROR_INVALID_FILE_SIZE = 100005;
const int ACL_ERROR_PARSE_FILE = 100006;
const int ACL_ERROR_FILE_MISSING_ATTR = 100007;
const int ACL_ERROR_FILE_ATTR_INVALID = 100008;
const int ACL_ERROR_INVALID_DUMP_CONFIG = 100009;
const int ACL_ERROR_INVALID_PROFILING_CONFIG = 100010;
const int ACL_ERROR_INVALID_MODEL_ID = 100011;
const int ACL_ERROR_DESERIALIZE_MODEL = 100012;
const int ACL_ERROR_PARSE_MODEL = 100013;
const int ACL_ERROR_READ_MODEL_FAILURE = 100014;
const int ACL_ERROR_MODEL_SIZE_INVALID = 100015;
const int ACL_ERROR_MODEL_MISSING_ATTR = 100016;
const int ACL_ERROR_MODEL_INPUT_NOT_MATCH = 100017;
const int ACL_ERROR_MODEL_OUTPUT_NOT_MATCH = 100018;
const int ACL_ERROR_MODEL_NOT_DYNAMIC = 100019;
const int ACL_ERROR_OP_TYPE_NOT_MATCH = 100020;
const int ACL_ERROR_OP_INPUT_NOT_MATCH = 100021;
const int ACL_ERROR_OP_OUTPUT_NOT_MATCH = 100022;
const int ACL_ERROR_OP_ATTR_NOT_MATCH = 100023;
const int ACL_ERROR_OP_NOT_FOUND = 100024;
const int ACL_ERROR_OP_LOAD_FAILED = 100025;
const int ACL_ERROR_UNSUPPORTED_DATA_TYPE = 100026;
const int ACL_ERROR_FORMAT_NOT_MATCH = 100027;
const int ACL_ERROR_BIN_SELECTOR_NOT_REGISTERED = 100028;
const int ACL_ERROR_KERNEL_NOT_FOUND = 100029;
const int ACL_ERROR_BIN_SELECTOR_ALREADY_REGISTERED = 100030;
const int ACL_ERROR_KERNEL_ALREADY_REGISTERED = 100031;
const int ACL_ERROR_INVALID_QUEUE_ID = 100032;
const int ACL_ERROR_REPEAT_SUBSCRIBE = 100033;
const int ACL_ERROR_STREAM_NOT_SUBSCRIBE = 100034;
const int ACL_ERROR_THREAD_NOT_SUBSCRIBE = 100035;
const int ACL_ERROR_WAIT_CALLBACK_TIMEOUT = 100036;
const int ACL_ERROR_REPEAT_FINALIZE = 100037;
const int ACL_ERROR_NOT_STATIC_AIPP = 100038;

const int ACL_ERROR_BAD_ALLOC = 200000;
const int ACL_ERROR_API_NOT_SUPPORT = 200001;
const int ACL_ERROR_INVALID_DEVICE = 200002;
const int ACL_ERROR_MEMORY_ADDRESS_UNALIGNED = 200003;
const int ACL_ERROR_RESOURCE_NOT_MATCH = 200004;
const int ACL_ERROR_INVALID_RESOURCE_HANDLE = 200005;
const int ACL_ERROR_FEATURE_UNSUPPORTED = 200006;

const int ACL_ERROR_STORAGE_OVER_LIMIT = 300000;

const int ACL_ERROR_INTERNAL_ERROR = 500000;
const int ACL_ERROR_FAILURE = 500001;
const int ACL_ERROR_GE_FAILURE = 500002;
const int ACL_ERROR_RT_FAILURE = 500003;
const int ACL_ERROR_DRV_FAILURE = 500004;
const int ACL_ERROR_PROFILING_FAILURE = 500005;

typedef enum {
ACL_DT_UNDEFINED = -1,
ACL_FLOAT = 0,
ACL_FLOAT16 = 1,
ACL_INT8 = 2,
ACL_INT32 = 3,
ACL_UINT8 = 4,
ACL_INT16 = 6,
ACL_UINT16 = 7,
ACL_UINT32 = 8,
ACL_INT64 = 9,
ACL_UINT64 = 10,
ACL_DOUBLE = 11,
ACL_BOOL = 12,
} aclDataType;

typedef enum {
ACL_FORMAT_UNDEFINED = -1,
ACL_FORMAT_NCHW = 0,
ACL_FORMAT_NHWC = 1,
ACL_FORMAT_ND = 2,
ACL_FORMAT_NC1HWC0 = 3,
ACL_FORMAT_FRACTAL_Z = 4,
ACL_FORMAT_FRACTAL_NZ = 29,
} aclFormat;

typedef enum {
ACL_DEBUG = 0,
ACL_INFO = 1,
ACL_WARNING = 2,
ACL_ERROR = 3,
} aclLogLevel;

/**
* @ingroup AscendCL
* @brief Converts data of type aclFloat16 to data of type float
*
* @param value [IN] Data to be converted
* @retval Transformed data
*/
ACL_FUNC_VISIBILITY float aclFloat16ToFloat(aclFloat16 value);

/**
* @ingroup AscendCL
* @brief Converts data of type float to data of type aclFloat16
*
* @param value [IN] Data to be converted
* @retval Transformed data
*/
ACL_FUNC_VISIBILITY aclFloat16 aclFloatToFloat16(float value);

/**
* @ingroup AscendCL
* @brief create data of aclDataBuffer
*
* @param data [IN] pointer to data
* @li Need to be managed by the user,
* call aclrtMalloc interface to apply for memory,
* call aclrtFree interface to release memory
* @param size [IN] size of data in bytes
* @retval pointer to created instance. nullptr if run out of memory
*
* @see aclrtMalloc | aclrtFree
*/
ACL_FUNC_VISIBILITY aclDataBuffer *aclCreateDataBuffer(void *data, size_t size);

/**
* @ingroup AscendCL
* @brief destroy data of aclDataBuffer
*
* @par Function
* Only the aclDataBuffer type data is destroyed here.
* The memory of the data passed in when the aclDataDataBuffer interface
* is called to create aclDataBuffer type data must be released by the user
* @param dataBuffer [IN] pointer to the aclDataBuffer
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclCreateDataBuffer
*/
ACL_FUNC_VISIBILITY aclError aclDestroyDataBuffer(const aclDataBuffer *dataBuffer);

/**
* @ingroup AscendCL
* @brief get data address from aclDataBuffer
*
* @param dataBuffer [IN] pointer to the data of aclDataBuffer
* @retval data address
*/
ACL_FUNC_VISIBILITY void *aclGetDataBufferAddr(const aclDataBuffer *dataBuffer);

/**
* @ingroup AscendCL
* @brief get data size of aclDataBuffer
*
* @param dataBuffer [IN] pointer to the data of aclDataBuffer
* @retval data size
*/
ACL_FUNC_VISIBILITY uint32_t aclGetDataBufferSize(const aclDataBuffer *dataBuffer);

/**
* @ingroup AscendCL
* @brief get size of aclDataType
*
* @param dataType [IN] aclDataType data the size to get
* @retval size of the aclDataType
*/
ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType);

// interfaces of tensor desc
/**
* @ingroup AscendCL
* @brief create data aclTensorDesc
*
* @param dataType [IN] Data types described by tensor
* @param numDims [IN] the number of dimensions of the shape
* @param dims [IN] the size of the specified dimension
* @param format [IN] tensor format
* @retval aclTensorDesc pointer.
* @retval nullptr if param is invalid or run out of memory
*/
ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType,
int numDims,
const int64_t *dims,
aclFormat format);

/**
* @ingroup AscendCL
* @brief destroy data aclTensorDesc
*
* @param desc [IN] pointer to the data of aclTensorDesc to destroy
*/
ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc);

/**
* @ingroup AscendCL
* @brief get data type specified by the tensor description
*
* @param desc [IN] pointer to the instance of aclTensorDesc
* @retval data type specified by the tensor description.
* @retval ACL_DT_UNDEFINED if description is null
*/
ACL_FUNC_VISIBILITY aclDataType aclGetTensorDescType(const aclTensorDesc *desc);

/**
* @ingroup AscendCL
* @brief get data format specified by the tensor description
*
* @param desc [IN] pointer to the instance of aclTensorDesc
* @retval data format specified by the tensor description.
* @retval ACL_FORMAT_UNDEFINED if description is null
*/
ACL_FUNC_VISIBILITY aclFormat aclGetTensorDescFormat(const aclTensorDesc *desc);

/**
* @ingroup AscendCL
* @brief get tensor size specified by the tensor description
*
* @param desc [IN] pointer to the instance of aclTensorDesc
* @retval data size specified by the tensor description.
* @retval 0 if description is null
*/
ACL_FUNC_VISIBILITY size_t aclGetTensorDescSize(const aclTensorDesc *desc);

/**
* @ingroup AscendCL
* @brief get element count specified by the tensor description
*
* @param desc [IN] pointer to the instance of aclTensorDesc
* @retval element count specified by the tensor description.
* @retval 0 if description is null
*/
ACL_FUNC_VISIBILITY size_t aclGetTensorDescElementCount(const aclTensorDesc *desc);

/**
* @ingroup AscendCL
* @brief get number of dims specified by the tensor description
*
* @param desc [IN] pointer to the instance of aclTensorDesc
* @retval number of dims specified by the tensor description.
* @retval 0 if description is null
*/
ACL_FUNC_VISIBILITY size_t aclGetTensorDescNumDims(const aclTensorDesc *desc);

/**
* @ingroup AscendCL
* @brief Get the size of the specified dim in the tensor description
*
* @param desc [IN] pointer to the instance of aclTensorDesc
* @param index [IN] index of dims, start from 0.
* @retval dim specified by the tensor description and index.
* @retval -1 if description or index is invalid
*/
ACL_FUNC_VISIBILITY int64_t aclGetTensorDescDim(const aclTensorDesc *desc, size_t index);

/**
* @ingroup AscendCL
* @brief set tensor description name
*
* @param desc [IN] pointer to the instance of aclTensorDesc
* @param name [IN] tensor description name
*/
ACL_FUNC_VISIBILITY void aclSetTensorDescName(aclTensorDesc *desc, const char *name);

/**
* @ingroup AscendCL
* @brief get tensor description name
*
* @param desc [IN] pointer to the instance of aclTensorDesc
* @retval tensor description name.
* @retval empty string if description is null
*/
ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc);

/**
* @ingroup AscendCL
* @brief Convert the format in the source aclTensorDesc according to
* the specified dstFormat to generate a new target aclTensorDesc.
* The format in the source aclTensorDesc remains unchanged.
*
* @param srcDesc [IN] pointer to the source tensor desc
* @param dstFormat [IN] destination format
* @param dstDesc [OUT] pointer to the pointer to the destination tensor desc
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat,
aclTensorDesc **dstDesc);

/**
* @ingroup AscendCL
* @brief Set the storage format specified by the tensor description
*
* @param desc [IN|OUT] pointer to the instance of aclTensorDesc
* @param format [IN] the storage format
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclSetTensorStorageFormat(aclTensorDesc *desc, aclFormat format);

/**
* @ingroup AscendCL
* @brief Set the storage shape specified by the tensor description
*
* @param desc [IN|OUT] pointer to the instance of aclTensorDesc
* @param numDims [IN] the number of dimensions of the shape
* @param dims [IN] the size of the specified dimension
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclSetTensorStorageShape(aclTensorDesc *desc, int numDims, const int64_t *dims);

/**
* @ingroup AscendCL
* @brief an interface for users to output APP logs
*
* @param logLevel [IN] the level of current log
* @param func [IN] the function where the log is located
* @param file [IN] the file where the log is located
* @param line [IN] Number of source lines where the log is located
* @param fmt [IN] the format of current log
* @param ... [IN] the value of current log
*/
ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line,
const char *fmt, ...);

#define ACL_APP_LOG(level, fmt, ...) \
aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__)

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_BASE_H_

+ 932
- 0
dnn/atlas-stub/include/acl/acl_mdl.h View File

@@ -0,0 +1,932 @@
/**
* @file acl_mdl.h
*
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/

#ifndef INC_EXTERNAL_ACL_ACL_MODEL_H_
#define INC_EXTERNAL_ACL_ACL_MODEL_H_

#include <stddef.h>
#include <stdint.h>

#include "acl_base.h"
#include "acl_rt.h"

#ifdef __cplusplus
extern "C" {
#endif

#define ACL_MAX_DIM_CNT 128
#define ACL_MAX_TENSOR_NAME_LEN 128
#define ACL_MAX_BATCH_NUM 128
#define ACL_MAX_HW_NUM 128
#define ACL_MAX_SHAPE_COUNT 128

#define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data"
#define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data"

typedef struct aclmdlDataset aclmdlDataset;
typedef struct aclmdlDesc aclmdlDesc;
typedef struct aclmdlAIPP aclmdlAIPP;
typedef struct aclAippExtendInfo aclAippExtendInfo;

typedef enum {
ACL_YUV420SP_U8 = 1,
ACL_XRGB8888_U8,
ACL_RGB888_U8,
ACL_YUV400_U8,
ACL_NC1HWC0DI_FP16,
ACL_NC1HWC0DI_S8,
ACL_ARGB8888_U8,
ACL_YUYV_U8,
ACL_YUV422SP_U8,
ACL_AYUV444_U8,
ACL_RAW10,
ACL_RAW12,
ACL_RAW16,
ACL_RAW24,
ACL_AIPP_RESERVED = 0xffff,
} aclAippInputFormat;

typedef struct aclmdlIODims {
char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */
size_t dimCount; /**< dim array count */
int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */
} aclmdlIODims;

typedef struct aclAippDims {
aclmdlIODims srcDims; /**< input dims before model transform */
size_t srcSize; /**< input size before model transform */
aclmdlIODims aippOutdims; /**< aipp output dims */
size_t aippOutSize; /**< aipp output size */
} aclAippDims;

typedef struct aclmdlBatch {
size_t batchCount; /**< batch array count */
uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */
} aclmdlBatch;

typedef struct aclmdlHW {
size_t hwCount; /**< height&width array count */
uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */
} aclmdlHW;

typedef struct aclAippInfo {
aclAippInputFormat inputFormat;
int32_t srcImageSizeW;
int32_t srcImageSizeH;
int8_t cropSwitch;
int32_t loadStartPosW;
int32_t loadStartPosH;
int32_t cropSizeW;
int32_t cropSizeH;
int8_t resizeSwitch;
int32_t resizeOutputW;
int32_t resizeOutputH;
int8_t paddingSwitch;
int32_t leftPaddingSize;
int32_t rightPaddingSize;
int32_t topPaddingSize;
int32_t bottomPaddingSize;
int8_t cscSwitch;
int8_t rbuvSwapSwitch;
int8_t axSwapSwitch;
int8_t singleLineMode;
int32_t matrixR0C0;
int32_t matrixR0C1;
int32_t matrixR0C2;
int32_t matrixR1C0;
int32_t matrixR1C1;
int32_t matrixR1C2;
int32_t matrixR2C0;
int32_t matrixR2C1;
int32_t matrixR2C2;
int32_t outputBias0;
int32_t outputBias1;
int32_t outputBias2;
int32_t inputBias0;
int32_t inputBias1;
int32_t inputBias2;
int32_t meanChn0;
int32_t meanChn1;
int32_t meanChn2;
int32_t meanChn3;
float minChn0;
float minChn1;
float minChn2;
float minChn3;
float varReciChn0;
float varReciChn1;
float varReciChn2;
float varReciChn3;
aclFormat srcFormat;
aclDataType srcDatatype;
size_t srcDimNum;
size_t shapeCount;
aclAippDims outDims[ACL_MAX_SHAPE_COUNT];
aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */
} aclAippInfo;

/**
* @ingroup AscendCL
* @brief Create data of type aclmdlDesc
*
* @retval the aclmdlDesc pointer
*/
ACL_FUNC_VISIBILITY aclmdlDesc *aclmdlCreateDesc();

/**
* @ingroup AscendCL
* @brief destroy data of type aclmdlDesc
*
* @param modelDesc [IN] Pointer to almdldlDesc to be destroyed
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlDestroyDesc(aclmdlDesc *modelDesc);

/**
* @ingroup AscendCL
* @brief Get aclmdlDesc data of the model according to the model ID
*
* @param modelDesc [OUT] aclmdlDesc pointer
* @param modelId [IN] model id
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetDesc(aclmdlDesc *modelDesc, uint32_t modelId);

/**
* @ingroup AscendCL
* @brief Get the number of the inputs of
* the model according to data of aclmdlDesc
*
* @param modelDesc [IN] aclmdlDesc pointer
* @retval input size with aclmdlDesc
*/
ACL_FUNC_VISIBILITY size_t aclmdlGetNumInputs(aclmdlDesc *modelDesc);

/**
* @ingroup AscendCL
* @brief Get the number of the output of
* the model according to data of aclmdlDesc
*
* @param modelDesc [IN] aclmdlDesc pointer
* @retval output size with aclmdlDesc
*/
ACL_FUNC_VISIBILITY size_t aclmdlGetNumOutputs(aclmdlDesc *modelDesc);

/**
* @ingroup AscendCL
* @brief Get the size of the specified input according to
* the data of type aclmdlDesc
*
* @param modelDesc [IN] aclmdlDesc pointer
* @param index [IN] the size of the number of inputs to be obtained,
* the index value starts from 0
* @retval Specify the size of the input
*/
ACL_FUNC_VISIBILITY size_t aclmdlGetInputSizeByIndex(aclmdlDesc *modelDesc, size_t index);

/**
* @ingroup AscendCL
* @brief Get the size of the specified output according to
* the data of type aclmdlDesc
*
* @param modelDesc [IN] aclmdlDesc pointer
* @param index [IN] the size of the number of outputs to be obtained,
* the index value starts from 0
* @retval Specify the size of the output
*/
ACL_FUNC_VISIBILITY size_t aclmdlGetOutputSizeByIndex(aclmdlDesc *modelDesc, size_t index);

/**
* @ingroup AscendCL
* @brief Create data of type aclmdlDataset
*
* @retval the aclmdlDataset pointer
*/
ACL_FUNC_VISIBILITY aclmdlDataset *aclmdlCreateDataset();

/**
* @ingroup AscendCL
* @brief destroy data of type aclmdlDataset
*
* @param dataset [IN] Pointer to aclmdlDataset to be destroyed
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlDestroyDataset(const aclmdlDataset *dataset);

/**
* @ingroup AscendCL
* @brief Add aclDataBuffer to aclmdlDataset
*
* @param dataset [IN|OUT] aclmdlDataset address of aclDataBuffer to be added
* @param dataBuffer [IN] aclDataBuffer address to be added
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlAddDatasetBuffer(aclmdlDataset *dataset,
aclDataBuffer *dataBuffer);

/**
* @ingroup AscendCL
* @brief Get the number of aclDataBuffer in aclmdlDataset
*
* @param dataset [IN] aclmdlDataset poiter
* @retval the number of aclDataBuffer
*/
ACL_FUNC_VISIBILITY size_t aclmdlGetDatasetNumBuffers(const aclmdlDataset *dataset);

/**
* @ingroup AscendCL
* @brief Get the aclDataBuffer in aclmdlDataset by index
*
* @param dataset [IN] aclmdlDataset poiter
* @param index [IN] the index of aclDataBuffer
* @retval Get successfully, return the address of aclDataBuffer
* @retval Failure return NULL
*/
ACL_FUNC_VISIBILITY aclDataBuffer *aclmdlGetDatasetBuffer(const aclmdlDataset *dataset,
size_t index);

/**
* @ingroup AscendCL
* @brief Load offline model data from files
* and manage memory internally by the system
*
* @par Function
* After the system finishes loading the model,
* the model ID returned is used as a mark to identify the model
* during subsequent operations.
* @param modelPath [IN] Storage path for offline model files
* @param modelId [OUT] Model ID generated after
* the system finishes loading the model
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFile(const char *modelPath, uint32_t *modelId);

/**
* @ingroup AscendCL
* @brief Load offline model data from memory and manage the memory of
* model running internally by the system
*
* @par Function
* After the system finishes loading the model,
* the model ID returned is used as a mark to identify the model
* during subsequent operations
* @param model [IN] Model data stored in memory
* @param modelSize [IN] model data size
* @param modelId [OUT] Model ID generated after
* the system finishes loading the model
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize,
uint32_t *modelId);

/**
* @ingroup AscendCL
* @brief Load offline model data from a file,
* and the user manages the memory of the model run by itself
*
* @par Function
* After the system finishes loading the model,
* the model ID returned is used as a mark to identify the model
* during subsequent operations.
* @param modelPath [IN] Storage path for offline model files
* @param modelId [OUT] Model ID generated after finishes loading the model
* @param workPtr [IN] A pointer to the working memory
* required by the model on the Device,can be null
* @param workSize [IN] The amount of working memory required by the model
* @param weightPtr [IN] Pointer to model weight memory on Device
* @param weightSize [IN] The amount of weight memory required by the model
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath,
uint32_t *modelId, void *workPtr, size_t workSize,
void *weightPtr, size_t weightSize);

/**
* @ingroup AscendCL
* @brief Load offline model data from memory,
* and the user can manage the memory of model running
*
* @par Function
* After the system finishes loading the model,
* the model ID returned is used as a mark to identify the model
* during subsequent operations
* @param model [IN] Model data stored in memory
* @param modelSize [IN] model data size
* @param modelId [OUT] Model ID generated after finishes loading the model
* @param workPtr [IN] A pointer to the working memory
* required by the model on the Device,can be null
* @param workSize [IN] work memory size
* @param weightPtr [IN] Pointer to model weight memory on Device,can be null
* @param weightSize [IN] The amount of weight memory required by the model
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize,
uint32_t *modelId, void *workPtr, size_t workSize,
void *weightPtr, size_t weightSize);

/**
* @ingroup AscendCL
* @brief load model from file with async queue
*
* @param modelPath [IN] model path
* @param modelId [OUT] return model id if load success
* @param inputQ [IN] input queue pointer
* @param inputQNum [IN] input queue num
* @param outputQ [IN] output queue pointer
* @param outputQNum [IN] output queue num
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithQ(const char *modelPath, uint32_t *modelId, const uint32_t *inputQ,
size_t inputQNum, const uint32_t *outputQ, size_t outputQNum);

/**
* @ingroup AscendCL
* @brief load model from memory with async queue
*
* @param model [IN] model memory which user manages
* @param modelSize [IN] model size
* @param modelId [OUT] return model id if load success
* @param inputQ [IN] input queue pointer
* @param inputQNum [IN] input queue num
* @param outputQ [IN] output queue pointer
* @param outputQNum [IN] output queue num
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId,
const uint32_t *inputQ, size_t inputQNum, const uint32_t *outputQ, size_t outputQNum);

/**
* @ingroup AscendCL
* @brief Execute model synchronous inference until the inference result is returned
*
* @param modelId [IN] ID of the model to perform inference
* @param input [IN] Input data for model inference
* @param output [OUT] Output data for model inference
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset *input,
aclmdlDataset *output);

/**
* @ingroup AscendCL
* @brief Execute model asynchronous inference until the inference result is returned
*
* @param modelId [IN] ID of the model to perform inference
* @param input [IN] Input data for model inference
* @param output [OUT] Output data for model inference
* @param stream [IN] stream
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem
*/
ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input,
aclmdlDataset *output, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief unload model with model id
*
* @param modelId [IN] model id to be unloaded
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlUnload(uint32_t modelId);

/**
* @ingroup AscendCL
* @brief Get the weight memory size and working memory size
* required for model execution according to the model file
*
* @param fileName [IN] Model path to get memory information
* @param workSize [OUT] The amount of working memory for model executed
* @param weightSize [OUT] The amount of weight memory for model executed
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlQuerySize(const char *fileName, size_t *workSize, size_t *weightSize);

/**
* @ingroup AscendCL
* @brief Obtain the weights required for
* model execution according to the model data in memory
*
* @par Restriction
* The execution and weight memory is Device memory,
* and requires user application and release.
* @param model [IN] model memory which user manages
* @param modelSize [IN] model data size
* @param workSize [OUT] The amount of working memory for model executed
* @param weightSize [OUT] The amount of weight memory for model executed
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlQuerySizeFromMem(const void *model, size_t modelSize, size_t *workSize,
size_t *weightSize);

/**
* @ingroup AscendCL
* @brief In dynamic batch scenarios,
* it is used to set the number of images processed
* at one time during model inference
*
* @param modelId [IN] model id
* @param dataset [IN] data for model inference
* @param index [IN] index of dynamic tensor
* @param batchSize [IN] Number of images processed at a time during model
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetDynamicBatchSize(uint32_t modelId, aclmdlDataset *dataset, size_t index,
uint64_t batchSize);

/**
* @ingroup AscendCL
* @brief Sets the H and W of the specified input of the model
*
* @param modelId [IN] model id
* @param dataset [IN] data for model inference
* @param index [IN] index of dynamic tensor
* @param height [IN] model height
* @param width [IN] model width
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetDynamicHWSize(uint32_t modelId, aclmdlDataset *dataset, size_t index,
uint64_t height, uint64_t width);

/**
* @ingroup AscendCL
* @brief get input dims info
*
* @param modelDesc [IN] model description
* @param index [IN] input tensor index
* @param dims [OUT] dims info
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclmdlGetInputDimsV2
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetInputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);

/**
* @ingroup AscendCL
* @brief get input dims info(version 2), especially for static aipp
* it is the same with aclmdlGetInputDims while model without static aipp
*
* @param modelDesc [IN] model description
* @param index [IN] input tensor index
* @param dims [OUT] dims info
*
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclmdlGetInputDims
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetInputDimsV2(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);

/**
* @ingroup AscendCL
* @brief get output dims info
*
* @param modelDesc [IN] model description
* @param index [IN] output tensor index
* @param dims [OUT] dims info
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetOutputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);

/**
* @ingroup AscendCL
* @brief get current output dims info
*
* @par Function
* The following use cases are supported:
* @li Get current output shape when model is dynamic and
* dynamic shape info is set
* @li Get max output shape when model is dynamic and
* dynamic shape info is not set
* @li Get actual output shape when model is static
*
* @param modelDesc [IN] model description
* @param index [IN] output tensor index
* @param dims [OUT] dims info
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetCurOutputDims(const aclmdlDesc *modelDesc, size_t index, aclmdlIODims *dims);

/**
* @ingroup AscendCL
* @brief get input name by index
*
* @param modelDesc [IN] model description
* @param index [IN] intput tensor index
* @retval input tensor name,the same life cycle with modelDesc
*/
ACL_FUNC_VISIBILITY const char *aclmdlGetInputNameByIndex(const aclmdlDesc *modelDesc, size_t index);

/**
* @ingroup AscendCL
* @brief get output name by index
*
* @param modelDesc [IN] model description
* @param index [IN] output tensor index
* @retval output tensor name,the same life cycle with modelDesc
*/
ACL_FUNC_VISIBILITY const char *aclmdlGetOutputNameByIndex(const aclmdlDesc *modelDesc, size_t index);

/**
* @ingroup AscendCL
* @brief get input format by index
*
* @param modelDesc [IN] model description
* @param index [IN] intput tensor index
* @retval input tensor format
*/
ACL_FUNC_VISIBILITY aclFormat aclmdlGetInputFormat(const aclmdlDesc *modelDesc, size_t index);

/**
* @ingroup AscendCL
* @brief get output format by index
*
* @param modelDesc [IN] model description
* @param index [IN] output tensor index
* @retval output tensor format
*/
ACL_FUNC_VISIBILITY aclFormat aclmdlGetOutputFormat(const aclmdlDesc *modelDesc, size_t index);

/**
* @ingroup AscendCL
* @brief get input data type by index
*
* @param modelDesc [IN] model description
* @param index [IN] intput tensor index
* @retval input tensor data type
*/
ACL_FUNC_VISIBILITY aclDataType aclmdlGetInputDataType(const aclmdlDesc *modelDesc, size_t index);

/**
* @ingroup AscendCL
* @brief get output data type by index
*
* @param modelDesc [IN] model description
* @param index [IN] output tensor index
* @retval output tensor data type
*/
ACL_FUNC_VISIBILITY aclDataType aclmdlGetOutputDataType(const aclmdlDesc *modelDesc, size_t index);

/**
* @ingroup AscendCL
* @brief get input tensor index by name
*
* @param modelDesc [IN] model description
* @param name [IN] intput tensor name
* @param index [OUT] intput tensor index
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetInputIndexByName(const aclmdlDesc *modelDesc, const char *name, size_t *index);

/**
* @ingroup AscendCL
* @brief get output tensor index by name
*
* @param modelDesc [IN] model description
* @param name [IN] output tensor name
* @param index [OUT] output tensor index
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetOutputIndexByName(const aclmdlDesc *modelDesc, const char *name, size_t *index);

/**
* @ingroup AscendCL
* @brief get dynamic batch info
*
* @param modelDesc [IN] model description
* @param batch [OUT] dynamic batch info
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetDynamicBatch(const aclmdlDesc *modelDesc, aclmdlBatch *batch);

/**
* @ingroup AscendCL
* @brief get dynamic height&width info
*
* @param modelDesc [IN] model description
* @param index [IN] input tensor index
* @param hw [OUT] dynamic height&width info
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetDynamicHW(const aclmdlDesc *modelDesc, size_t index, aclmdlHW *hw);

/**
* @ingroup AscendCL
* @brief Create data of type aclmdlAIPP
*
* @param batchSize [IN] batchsizes of model
* @retval the aclmdlAIPP pointer
*/
ACL_FUNC_VISIBILITY aclmdlAIPP *aclmdlCreateAIPP(uint64_t batchSize);

/**
* @ingroup AscendCL
* @brief destroy data of type aclmdlAIPP
*
* @param aippParmsSet [IN] Pointer for aclmdlAIPP to be destroyed
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclmdlDestroyAIPP(const aclmdlAIPP *aippParmsSet);

/**
* @ingroup AscendCL
* @brief set InputFormat of type aclmdlAIPP
*
* @param aippParmsSet [IN] Pointer for aclmdlAIPP
* @param inputFormat [IN] The inputFormat of aipp
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet, aclAippInputFormat inputFormat);

/**
* @ingroup AscendCL
* @brief set cscParms of type aclmdlAIPP
*
* @param aippParmsSet [IN] Pointer for aclmdlAIPP
* @param csc_switch [IN] Csc switch
* @param cscMatrixR0C0 [IN] Csc_matrix_r0_c0
* @param cscMatrixR0C1 [IN] Csc_matrix_r0_c1
* @param cscMatrixR0C2 [IN] Csc_matrix_r0_c2
* @param cscMatrixR1C0 [IN] Csc_matrix_r1_c0
* @param cscMatrixR1C1 [IN] Csc_matrix_r1_c1
* @param cscMatrixR1C2 [IN] Csc_matrix_r1_c2
* @param cscMatrixR2C0 [IN] Csc_matrix_r2_c0
* @param cscMatrixR2C1 [IN] Csc_matrix_r2_c1
* @param cscMatrixR2C2 [IN] Csc_matrix_r2_c2
* @param cscOutputBiasR0 [IN] Output Bias for RGB to YUV, element of row 0, unsigned number
* @param cscOutputBiasR1 [IN] Output Bias for RGB to YUV, element of row 1, unsigned number
* @param cscOutputBiasR2 [IN] Output Bias for RGB to YUV, element of row 2, unsigned number
* @param cscInputBiasR0 [IN] Input Bias for YUV to RGB, element of row 0, unsigned number
* @param cscInputBiasR1 [IN] Input Bias for YUV to RGB, element of row 1, unsigned number
* @param cscInputBiasR2 [IN] Input Bias for YUV to RGB, element of row 2, unsigned number
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch,
int16_t cscMatrixR0C0, int16_t cscMatrixR0C1, int16_t cscMatrixR0C2,
int16_t cscMatrixR1C0, int16_t cscMatrixR1C1, int16_t cscMatrixR1C2,
int16_t cscMatrixR2C0, int16_t cscMatrixR2C1, int16_t cscMatrixR2C2,
uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1,
uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0,
uint8_t cscInputBiasR1, uint8_t cscInputBiasR2);

/**
* @ingroup AscendCL
* @brief set rb/ub swap switch of type aclmdlAIPP
*
* @param aippParmsSet [IN] Pointer for aclmdlAIPP
* @param rbuvSwapSwitch [IN] rb/ub swap switch
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch);

/**
* @ingroup AscendCL
* @brief set RGBA->ARGB, YUVA->AYUV swap switch of type aclmdlAIPP
*
* @param aippParmsSet [IN] Pointer for aclmdlAIPP
* @param axSwapSwitch [IN] RGBA->ARGB, YUVA->AYUV swap switch
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch);

/**
* @ingroup AscendCL
* @brief set source image of type aclmdlAIPP
*
* @param aippParmsSet [IN] Pointer for aclmdlAIPP
* @param srcImageSizeW [IN] Source image width
* @param srcImageSizeH [IN] Source image height
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW,
int32_t srcImageSizeH);

/**
* @ingroup AscendCL
* @brief set resize switch of type aclmdlAIPP
*
* @param aippParmsSet [IN] Pointer for aclmdlAIPP
* @param scfSwitch [IN] Resize switch
* @param scfInputSizeW [IN] Input width of scf
* @param scfInputSizeH [IN] Input height of scf
* @param scfOutputSizeW [IN] Output width of scf
* @param scfOutputSizeH [IN] Output height of scf
* @param batchIndex [IN] Batch parameter index
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet,
int8_t scfSwitch,
int32_t scfInputSizeW,
int32_t scfInputSizeH,
int32_t scfOutputSizeW,
int32_t scfOutputSizeH,
uint64_t batchIndex);

/**
* @ingroup AscendCL
* @brief set cropParams of type aclmdlAIPP
*
* @param aippParmsSet [IN] Pointer for aclmdlAIPP
* @param cropSwitch [IN] Crop switch
* @param cropStartPosW [IN] The start horizontal position of cropping
* @param cropStartPosH [IN] The start vertical position of cropping
* @param cropSizeW [IN] Crop width
* @param cropSizeH [IN] Crop height
* @param batchIndex [IN] Batch parameter index
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet,
int8_t cropSwitch,
int32_t cropStartPosW,
int32_t cropStartPosH,
int32_t cropSizeW,
int32_t cropSizeH,
uint64_t batchIndex);

/**
* @ingroup AscendCL
* @brief set paddingParams of type aclmdlAIPP
*
* @param aippParmsSet [IN] Pointer for aclmdlAIPP
* @param paddingSwitch [IN] Padding switch
* @param paddingSizeTop [IN] Top padding size
* @param paddingSizeBottom [IN] Bottom padding size
* @param paddingSizeLeft [IN] Left padding size
* @param paddingSizeRight [IN] Right padding size
* @param batchIndex [IN] Batch parameter index
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch,
int32_t paddingSizeTop, int32_t paddingSizeBottom,
int32_t paddingSizeLeft, int32_t paddingSizeRight,
uint64_t batchIndex);

/**
* @ingroup AscendCL
* @brief set DtcPixelMean of type aclmdlAIPP
*
* @param aippParmsSet [IN] Pointer for aclmdlAIPP
* @param dtcPixelMeanChn0 [IN] Mean value of channel 0
* @param dtcPixelMeanChn1 [IN] Mean value of channel 1
* @param dtcPixelMeanChn2 [IN] Mean value of channel 2
* @param dtcPixelMeanChn3 [IN] Mean value of channel 3
* @param batchIndex [IN] Batch parameter index
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet,
int16_t dtcPixelMeanChn0,
int16_t dtcPixelMeanChn1,
int16_t dtcPixelMeanChn2,
int16_t dtcPixelMeanChn3,
uint64_t batchIndex);

/**
* @ingroup AscendCL
* @brief set DtcPixelMin of type aclmdlAIPP
*
* @param aippParmsSet [IN] Pointer for aclmdlAIPP
* @param dtcPixelMinChn0 [IN] Min value of channel 0
* @param dtcPixelMinChn1 [IN] Min value of channel 1
* @param dtcPixelMinChn2 [IN] Min value of channel 2
* @param dtcPixelMinChn3 [IN] Min value of channel 3
* @param batchIndex [IN] Batch parameter index
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet,
float dtcPixelMinChn0,
float dtcPixelMinChn1,
float dtcPixelMinChn2,
float dtcPixelMinChn3,
uint64_t batchIndex);

/**
* @ingroup AscendCL
* @brief set PixelVarReci of type aclmdlAIPP
*
* @param aippParmsSet [IN] Pointer for aclmdlAIPP
* @param dtcPixelVarReciChn0 [IN] sfr_dtc_pixel_variance_reci_ch0
* @param dtcPixelVarReciChn1 [IN] sfr_dtc_pixel_variance_reci_ch1
* @param dtcPixelVarReciChn2 [IN] sfr_dtc_pixel_variance_reci_ch2
* @param dtcPixelVarReciChn3 [IN] sfr_dtc_pixel_variance_reci_ch3
* @param batchIndex [IN] Batch parameter index
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet,
float dtcPixelVarReciChn0,
float dtcPixelVarReciChn1,
float dtcPixelVarReciChn2,
float dtcPixelVarReciChn3,
uint64_t batchIndex);

/**
* @ingroup AscendCL
* @brief set aipp parameters to model
*
* @param modelId [IN] model id
* @param dataset [IN] Pointer of dataSize
* @param index [IN] index of dataBuffer
* @param aippParmsSet [IN] Pointer for aclmdlAIPP
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId,
aclmdlDataset *dataset,
size_t index,
const aclmdlAIPP *aippParmsSet);

/**
* @ingroup AscendCL
* @brief get static aipp parameters from model
*
* @param modelId [IN] model id
* @param index [IN] index of tensor
* @param aippinfo [OUT] Pointer for static aipp info
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval ACL_ERROR_MODEL_AIPP_NOT_EXIST The tensor of index is not configured with aipp
* @retval OtherValues Failure
*
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo);
#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_

+ 454
- 0
dnn/atlas-stub/include/acl/acl_op.h View File

@@ -0,0 +1,454 @@
/**
* @file acl_op.h
*
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
#ifndef INC_EXTERNAL_ACL_ACL_OP_H_
#define INC_EXTERNAL_ACL_ACL_OP_H_

#include "acl_base.h"
#include "acl_rt.h"

#ifdef __cplusplus
extern "C" {
#endif

typedef struct aclopHandle aclopHandle;
typedef struct aclopAttr aclopAttr;
typedef struct aclopKernelDesc aclopKernelDesc;

typedef void (*aclDataDeallocator)(void *data, size_t length);

const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1;

typedef enum aclEngineType {
ACL_ENGINE_SYS,
ACL_ENGINE_AICORE,
ACL_ENGINE_VECTOR,
} aclopEngineType;

/**
* @ingroup AscendCL
* @brief Set base directory that contains single op models
*
* @par Restriction
* The aclopSetModelDir interface can be called only once in a process.
* @param modelDir [IN] path of the directory
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetModelDir(const char *modelDir);

/**
* @ingroup AscendCL
* @brief load single op models from memory
*
* @par Restriction
* The aclopLoad interface can be called more than one times in a process.
* @param model [IN] address of single op models
* @param modelSize [IN] size of single op models
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopLoad(const void *model, size_t modelSize);

/**
* @ingroup AscendCL
* @brief create data of type aclopAttr
*
* @retval pointer to created instance.
* @retval nullptr if run out of memory
*/
ACL_FUNC_VISIBILITY aclopAttr *aclopCreateAttr();

/**
* @ingroup AscendCL
* @brief destroy data of typ aclopAttr
*
* @param attr [IN] pointer to the instance of aclopAttr
*/
ACL_FUNC_VISIBILITY void aclopDestroyAttr(const aclopAttr *attr);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is bool
*
* @param attr [IN] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param attrValue [IN] attribute value
* false if attrValue is 0, true otherwise.
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrBool(aclopAttr *attr, const char *attrName, uint8_t attrValue);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is int64_t
*
* @param attr [IN] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param attrValue [IN] attribute value
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrInt(aclopAttr *attr, const char *attrName, int64_t attrValue);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is float
*
* @param attr [IN] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param attrValue [IN] attribute value
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrFloat(aclopAttr *attr, const char *attrName, float attrValue);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is string
*
* @param attr [IN] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param attrValue [IN] attribute value
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *attrName, const char *attrValue);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is list of bools
*
* @param attr [IN] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param numValues [IN] number of values. false if attrValue is 0, true otherwise.
* @param values [IN] pointer to values
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues,
const uint8_t *values);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is list of ints
*
* @param attr [IN] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param numValues [IN] number of values
* @param values [IN] pointer to values
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues,
const int64_t *values);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is list of floats
*
* @param attr [IN] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param numValues [IN] number of values
* @param values [IN] pointer to values
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues,
const float *values);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is list of strings
*
* @param attr [IN] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param numValues [IN] number of values
* @param values [IN] pointer to values
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues,
const char **values);

/**
* @ingroup AscendCL
* @brief set an attribute. the type of the attribute is list of list of ints
*
* @param attr [IN] pointer to the instance of aclopAttr
* @param attrName [IN] attribute name
* @param numLists [IN] number of lists
* @param numValues [IN] pointer to number of values of each list
* @param values [IN] pointer to values
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr,
const char *attrName,
int numLists,
const int *numValues,
const int64_t *const values[]);

/**
* @ingroup AscendCL
* @brief Load and execute the specified operator asynchronously
*
* @par Restriction
* @li The input and output organization of each operator is different,
* and the application needs to organize the operator strictly
* according to the operator input and output parameters when calling.
* @li When the user calls aclopExecute,
* the ACL finds the corresponding task according to the optype,
* the description of the input tesnsor,
* the description of the output tesnsor, and attr, and issues the execution.
* @param opType [IN] type of op
* @param numInputs [IN] number of inputs
* @param inputDesc [IN] pointer to array of input tensor descriptions
* @param inputs [IN] pointer to array of input buffers
* @param numOutputs [IN] number of outputs
* @param outputDesc [IN] pointer to array of output tensor descriptions
* @param outputs [OUT] pointer to array of output buffers
* @param attr [IN] pointer to instance of aclopAttr.
* may pass nullptr if the op has no attribute
* @param stream [IN] stream
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
const aclDataBuffer *const inputs[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
aclDataBuffer *const outputs[],
const aclopAttr *attr,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create a instance of aclopHandle.
*
* @param opType [IN] type of op
* @param numInputs [IN] number of inputs
* @param inputDesc [IN] pointer to array of input tensor descriptions
* @param numOutputs [IN] number of outputs
* @param outputDesc [IN] pointer to array of output tensor descriptions
* @param opAttr [IN] pointer to instance of aclopAttr.
* may pass nullptr if the op has no attribute
* @param handle [OUT] pointer to the pointer to the handle
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *opAttr,
aclopHandle **handle);

/**
* @ingroup AscendCL
* @brief destroy aclopHandle instance
*
* @param handle [IN] pointer to the instance of aclopHandle
*/
ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle);

/**
* @ingroup AscendCL
* @brief execute an op with the handle.
* can save op model matching cost compared with aclopExecute
*
* @param handle [IN] pointer to the instance of aclopHandle.
* The aclopCreateHandle interface has been called
* in advance to create aclopHandle type data.
* @param numInputs [IN] number of inputs
* @param inputs [IN] pointer to array of input buffers.
* The aclCreateDataBuffer interface has been called
* in advance to create aclDataBuffer type data.
* @param numOutputs [IN] number of outputs
* @param outputs [IN] pointer to array of output buffers
* @param stream [IN] stream
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclopCreateHandle | aclCreateDataBuffer
*/
ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle,
int numInputs,
const aclDataBuffer *const inputs[],
int numOutputs,
aclDataBuffer *const outputs[],
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief cast data type
*
* @param srcDesc [IN] source tensor desc
* @param srcBuffer [IN] source tensor buffer
* @param dstDesc [IN] destination tensor desc
* @param dstBuffer [OUT] destination tensor buffer
* @param truncate [IN] do not truncate if value is 0, truncate otherwise
* @param stream [IN] stream
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc,
const aclDataBuffer *srcBuffer,
const aclTensorDesc *dstDesc,
aclDataBuffer *dstBuffer,
uint8_t truncate,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create a handle for casting datatype
*
* @param srcDesc [IN] source tensor desc
* @param dstDesc [IN] destination tensor desc
* @param truncate [IN] do not truncate if value is 0, truncate otherwise
* @param handle [IN] pointer to the pointer to the handle
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc,
aclTensorDesc *dstDesc,
uint8_t truncate,
aclopHandle **handle);


/**
* @ingroup AscendCL
* @brief create kernel
*
* @param opType [IN] op type
* @param kernelId [IN] kernel id
* @param kernelName [IN] kernel name
* @param binData [IN] kernel bin data
* @param binSize [IN] kernel bin size
* @param enginetype [IN] enigne type
* @param deallocator [IN] callback function for deallocating bin data,
* null if bin data to be deallocated by caller
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclopCompile
*/
ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType,
const char *kernelId,
const char *kernelName,
void *binData,
int binSize,
aclopEngineType enginetype,
aclDataDeallocator deallocator);


/**
* @ingroup AscendCL
* @brief create kernel
*
* @param numInputs [IN] number of inputs
* @param inputDesc [IN] pointer to array of input tensor descriptions
* @param numOutputs [IN] number of outputs
* @param outputDesc [IN] pointer to array of output tensor descriptions
* @param opAttr [IN] pointer to instance of aclopAttr
* @param aclopKernelDesc [IN] pointer to instance of aclopKernelDesc
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
typedef aclError (*aclopCompileFunc)(int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *opAttr,
aclopKernelDesc *aclopKernelDesc);

/**
* @ingroup AscendCL
* @brief register compile function
*
* @param opType [IN] op type
* @param func [IN] compile function
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclopUnregisterCompileFunc
*/
ACL_FUNC_VISIBILITY aclError aclopRegisterCompileFunc(const char *opType, aclopCompileFunc func);

/**
* @ingroup AscendCL
* @brief unregister compile function
*
* @param opType [IN] op type
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType);

/**
* @ingroup AscendCL
* @brief set kernel args
*
* @param kernelDesc [IN] pointer to instance of aclopKernelDesc
* @param kernelId [IN] kernel id
* @param blockDim [IN] block dim
* @param args [IN] args
* @param argSize [IN] size in bytes of args
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc,
const char *kernelId,
uint32_t blockDim,
const void *args,
uint32_t argSize);

/**
* @ingroup AscendCL
* @brief set workspace sizes
*
* @param kernelDesc [IN] pointer to instance of aclopKernelDesc
* @param numWorkspaces [IN] number of workspaces
* @param workspaceSizes [IN] pointer to array of sizes of workspaces
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kernelDesc, int numWorkspaces,
size_t *workspaceSizes);

/**
* @ingroup AscendCL
* @brief compile op with dynamic shape
*
* @param opType [IN] op type
* @param numInputs [IN] number of inputs
* @param inputDesc [IN] pointer to array of input tensor descriptions
* @param numOutputs [IN] number of outputs
* @param outputDesc [IN] pointer to array of output tensor descriptions
* @param attr [IN] pointer to instance of aclopAttr.
* may pass nullptr if the op has no attribute
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType,
int numInputs,
const aclTensorDesc *const inputDesc[],
int numOutputs,
const aclTensorDesc *const outputDesc[],
const aclopAttr *attr);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_OP_H_

+ 656
- 0
dnn/atlas-stub/include/acl/acl_rt.h View File

@@ -0,0 +1,656 @@
/**
* @file acl_rt.h
*
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/

#ifndef INC_EXTERNAL_ACL_ACL_RT_H_
#define INC_EXTERNAL_ACL_ACL_RT_H_

#include <stdint.h>
#include <stddef.h>
#include "acl_base.h"

#ifdef __cplusplus
extern "C" {
#endif

typedef enum aclrtRunMode {
ACL_DEVICE,
ACL_HOST,
} aclrtRunMode;

typedef enum aclrtTsId {
ACL_TS_ID_AICORE = 0,
ACL_TS_ID_AIVECTOR = 1,
ACL_TS_ID_RESERVED = 2,
} aclrtTsId;

typedef enum aclrtEventStatus {
ACL_EVENT_STATUS_COMPLETE = 0,
ACL_EVENT_STATUS_NOT_READY = 1,
ACL_EVENT_STATUS_RESERVED = 2,
} aclrtEventStatus;

typedef enum aclrtCallbackBlockType {
ACL_CALLBACK_NO_BLOCK,
ACL_CALLBACK_BLOCK,
} aclrtCallbackBlockType;

typedef enum aclrtMemcpyKind {
ACL_MEMCPY_HOST_TO_HOST,
ACL_MEMCPY_HOST_TO_DEVICE,
ACL_MEMCPY_DEVICE_TO_HOST,
ACL_MEMCPY_DEVICE_TO_DEVICE,
} aclrtMemcpyKind;

typedef enum aclrtMemMallocPolicy {
ACL_MEM_MALLOC_HUGE_FIRST,
ACL_MEM_MALLOC_HUGE_ONLY,
ACL_MEM_MALLOC_NORMAL_ONLY,
} aclrtMemMallocPolicy;

typedef struct rtExceptionInfo aclrtExceptionInfo;

typedef void (*aclrtCallback)(void *userData);

typedef void (*aclrtExceptionInfoCallback)(aclrtExceptionInfo *exceptionInfo);

/**
* @ingroup AscendCL
* @brief Set a callback function to handle exception information
*
* @param callback [IN] callback function to handle exception information
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSetExceptionInfoCallback(aclrtExceptionInfoCallback callback);

/**
* @ingroup AscendCL
* @brief Get task id from exception information
*
* @param info [IN] pointer of exception information
* @retval The task id from exception information
* @retval 0xFFFFFFFF if info is null
*/
ACL_FUNC_VISIBILITY uint32_t aclrtGetTaskIdFromExceptionInfo(const aclrtExceptionInfo *info);

/**
* @ingroup AscendCL
* @brief Get stream id from exception information
*
* @param info [IN] pointer of exception information
* @retval The stream id from exception information
* @retval 0xFFFFFFFF if info is null
*/
ACL_FUNC_VISIBILITY uint32_t aclrtGetStreamIdFromExceptionInfo(const aclrtExceptionInfo *info);

/**
* @ingroup AscendCL
* @brief Get thread id from exception information
*
* @param info [IN] pointer of exception information
* @retval The thread id of fail task
* @retval 0xFFFFFFFF if info is null
*/
ACL_FUNC_VISIBILITY uint32_t aclrtGetThreadIdFromExceptionInfo(const aclrtExceptionInfo *info);

/**
* @ingroup AscendCL
* @brief The thread that handles the callback function on the Stream
*
* @param threadId [IN] thread ID
* @param stream [IN] stream handle
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSubscribeReport(uint64_t threadId, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Add a callback function to be executed on the host
* to the task queue of the Stream
*
* @param fn [IN] Specify the callback function to be added
* The function prototype of the callback function is:
* typedef void (*aclrtCallback)(void *userData);
* @param userData [IN] User data to be passed to the callback function
* @param blockType [IN] callback block type
* @param stream [IN] stream handle
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtLaunchCallback(aclrtCallback fn, void *userData, aclrtCallbackBlockType blockType,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief After waiting for a specified time, trigger callback processing
*
* @par Function
* The thread processing callback specified by
* the aclrtSubscribeReport interface
* @param timeout [IN] timeout value
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSubscribeReport
*/
ACL_FUNC_VISIBILITY aclError aclrtProcessReport(int32_t timeout);

/**
* @ingroup AscendCL
* @brief Cancel thread registration,
* the callback function on the specified Stream
* is no longer processed by the specified thread
*
* @param threadId [IN] thread ID
* @param stream [IN] stream handle
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtUnSubscribeReport(uint64_t threadId, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create context and associates it with the calling thread
*
* @par Function
* The following use cases are supported:
* @li If you don't call the aclrtCreateContext interface
* to explicitly create the context,
* the system will use the default context, which is implicitly created
* when the aclrtSetDevice interface is called.
* @li If multiple contexts are created in a process
* (there is no limit on the number of contexts),
* the current thread can only use one of them at the same time.
* It is recommended to explicitly specify the context of the current thread
* through the aclrtSetCurrentContext interface to increase.
* the maintainability of the program.
* @param context [OUT] point to the created context
* @param deviceId [IN] device to create context on
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSetDevice | aclrtSetCurrentContext
*/
ACL_FUNC_VISIBILITY aclError aclrtCreateContext(aclrtContext *context, int32_t deviceId);

/**
* @ingroup AscendCL
* @brief destroy context instance
*
* @par Function
* Can only destroy context created through aclrtCreateContext interface
* @param context [IN] the context to destroy
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtCreateContext
*/
ACL_FUNC_VISIBILITY aclError aclrtDestroyContext(aclrtContext context);

/**
* @ingroup AscendCL
* @brief set the context of the thread
*
* @par Function
* The following scenarios are supported:
* @li If the aclrtCreateContext interface is called in a thread to explicitly
* create a Context (for example: ctx1), the thread's Context can be specified
* without calling the aclrtSetCurrentContext interface.
* The system uses ctx1 as the context of thread1 by default.
* @li If the aclrtCreateContext interface is not explicitly created,
* the system uses the default context as the context of the thread.
* At this time, the aclrtDestroyContext interface cannot be used to release
* the default context.
* @li If the aclrtSetCurrentContext interface is called multiple times to
* set the thread's Context, the last one prevails.
*
* @par Restriction
* @li If the cevice corresponding to the context set for the thread
* has been reset, you cannot set the context as the context of the thread,
* otherwise a business exception will result.
* @li It is recommended to use the context created in a thread.
* If the aclrtCreateContext interface is called in thread A to create a context,
* and the context is used in thread B,
* the user must guarantee the execution order of tasks in the same stream
* under the same context in two threads.
* @param context [IN] the current context of the thread
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtCreateContext | aclrtDestroyContext
*/
ACL_FUNC_VISIBILITY aclError aclrtSetCurrentContext(aclrtContext context);

/**
* @ingroup AscendCL
* @brief get the context of the thread
*
* @par Function
* If the user calls the aclrtSetCurrentContext interface
* multiple times to set the context of the current thread,
* then the last set context is obtained
* @param context [OUT] the current context of the thread
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSetCurrentContext
*/
ACL_FUNC_VISIBILITY aclError aclrtGetCurrentContext(aclrtContext *context);

/**
* @ingroup AscendCL
* @brief Specify the device to use for the operation
* implicitly create the default context and the default stream
*
* @par Function
* The following use cases are supported:
* @li Device can be specified in the process or thread.
* If you call the aclrtSetDevice interface multiple
* times to specify the same device,
* you only need to call the aclrtResetDevice interface to reset the device.
* @li The same device can be specified for operation
* in different processes or threads.
* @li Device is specified in a process,
* and multiple threads in the process can share this device to explicitly
* create a Context (aclrtCreateContext interface).
* @li In multi-device scenarios, you can switch to other devices
* through the aclrtSetDevice interface in the process.
* @param deviceId [IN] the device id
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtResetDevice |aclrtCreateContext
*/
ACL_FUNC_VISIBILITY aclError aclrtSetDevice(int32_t deviceId);

/**
* @ingroup AscendCL
* @brief Reset the current operating Device and free resources on the device,
* including the default context, the default stream,
* and all streams created under the default context,
* and synchronizes the interface.
* If the task under the default context or stream has not been completed,
* the system will wait for the task to complete before releasing it.
*
* @par Restriction
* @li The Context, Stream, and Event that are explicitly created
* on the device to be reset. Before resetting,
* it is recommended to follow the following interface calling sequence,
* otherwise business abnormalities may be caused.
* @li Interface calling sequence:
* call aclrtDestroyEvent interface to release Event or
* call aclrtDestroyStream interface to release explicitly created Stream->
* call aclrtDestroyContext to release explicitly created Context->
* call aclrtResetDevice interface
* @param deviceId [IN] the device id
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtResetDevice(int32_t deviceId);

/**
* @ingroup AscendCL
* @brief get target device of current thread
*
* @param deviceId [OUT] the device id
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtGetDevice(int32_t *deviceId);

/**
* @ingroup AscendCL
* @brief get target side
*
* @param runMode [OUT] the run mode
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtGetRunMode(aclrtRunMode *runMode);

/**
* @ingroup AscendCL
* @brief Wait for compute device to finish
*
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSynchronizeDevice(void);

/**
* @ingroup AscendCL
* @brief Set Scheduling TS
*
* @param tsId [IN] the ts id
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSetTsDevice(aclrtTsId tsId);

/**
* @ingroup AscendCL
* @brief get total device number.
*
* @param count [IN|OUT] the device number
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtGetDeviceCount(uint32_t *count);

/**
* @ingroup AscendCL
* @brief create event instance
*
* @param event [OUT] created event
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtCreateEvent(aclrtEvent *event);

/**
* @ingroup AscendCL
* @brief destroy event instance
*
* @par Function
* Only events created through the aclrtCreateEvent interface can be
* destroyed, synchronous interfaces. When destroying an event,
* the user must ensure that the tasks involved in the aclrtSynchronizeEvent
* interface or the aclrtStreamWaitEvent interface are completed before
* they are destroyed.
* @param event [IN] event to destroy
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtCreateEvent | aclrtSynchronizeEvent | aclrtStreamWaitEvent
*/
ACL_FUNC_VISIBILITY aclError aclrtDestroyEvent(aclrtEvent event);

/**
* @ingroup AscendCL
* @brief Record an Event in the Stream
*
* @param event [IN] event to record
* @param stream [IN] stream handle
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Reset an event
*
* @par Function
* Users need to make sure to wait for the tasks in the Stream
* to complete before resetting the Event
* @param event [IN] event to reset
* @param stream [IN] stream handle
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Queries an event's status
*
* @param event [IN] event to query
* @param status [OUT] event status
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status);

/**
* @ingroup AscendCL
* @brief Block Host Running, wait event to be complete
*
* @param event [IN] event to wait
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSynchronizeEvent(aclrtEvent event);

/**
* @ingroup AscendCL
* @brief computes the elapsed time between events.
*
* @param ms [OUT] time between start and end in ms
* @param start [IN] starting event
* @param end [IN] ending event
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtCreateEvent | aclrtRecordEvent | aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, aclrtEvent end);

/**
* @ingroup AscendCL
* @brief alloc memory on device
*
* @par Function
* alloc for size linear memory on device
* and return a pointer to allocated memory by *devPtr
*
* @par Restriction
* @li The memory requested by the aclrtMalloc interface needs to be released
* through the aclrtFree interface.
* @li Before calling the media data processing interface,
* if you need to apply memory on the device to store input or output data,
* you need to call acldvppMalloc to apply for memory.
* @param devPtr [IN|OUT] pointer to pointer to allocated memory on device
* @param size [IN] alloc memory size
* @param policy [IN] memory alloc policy
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtFree | acldvppMalloc
*/
ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr,
size_t size,
aclrtMemMallocPolicy policy);

/**
* @ingroup AscendCL
* @brief free device memory
*
* @par Function
* can only free memory allocated through the aclrtMalloc interface
* @param devPtr [IN] Pointer to memory to be freed
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtMalloc
*/
ACL_FUNC_VISIBILITY aclError aclrtFree(void *devPtr);

/**
* @ingroup AscendCL
* @brief alloc memory on host
*
* @par Restriction
* @li The requested memory cannot be used in the Device
* and needs to be explicitly copied to the Device.
* @li The memory requested by the aclrtMallocHost interface
* needs to be released through the aclrtFreeHost interface.
* @param hostPtr [IN|OUT] pointer to pointer to allocated memory on the host
* @param size [IN] alloc memory size
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtFreeHost
*/
ACL_FUNC_VISIBILITY aclError aclrtMallocHost(void **hostPtr, size_t size);

/**
* @ingroup AscendCL
* @brief free host memory
*
* @par Function
* can only free memory allocated through the aclrtMallocHost interface
* @param hostPtr [IN] free memory pointer
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtMallocHost
*/
ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr);

/**
* @ingroup AscendCL
* @brief synchronous memory replication between host and device
*
* @param dst [IN] destination address pointer
* @param destMax [IN] Max length of the destination address memory
* @param src [IN] source address pointer
* @param count [IN] the length of byte to copy
* @param kind [IN] memcpy type
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst,
size_t destMax,
const void *src,
size_t count,
aclrtMemcpyKind kind);

/**
* @ingroup AscendCL
* @brief Initialize memory and set contents of memory to specified value
*
* @par Function
* The memory to be initialized is on the Host or device side,
* and the system determines whether
* it is host or device according to the address
* @param devPtr [IN] Starting address of memory
* @param maxCount [IN] Max length of destination address memory
* @param value [IN] Set value
* @param count [IN] The length of memory
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t value, size_t count);

/**
* @ingroup AscendCL
* @brief Asynchronous memory replication between Host and Device
*
* @par Function
* After calling this interface,
* be sure to call the aclrtSynchronizeStream interface to ensure that
* the task of memory replication has been completed
*
* @par Restriction
* @li For on-chip Device-to-Device memory copy,
* both the source and destination addresses must be 64-byte aligned
* @param dst [IN] destination address pointer
* @param destMax [IN] Max length of destination address memory
* @param src [IN] source address pointer
* @param count [IN] the number of byte to copy
* @param kind [IN] memcpy type
* @param stream [IN] asynchronized task stream
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst,
size_t destMax,
const void *src,
size_t count,
aclrtMemcpyKind kind,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Asynchronous initialize memory
* and set contents of memory to specified value async
*
* @par Function
* The memory to be initialized is on the Host or device side,
* and the system determines whether
* it is host or device according to the address
* @param devPtr [IN] destination address pointer
* @param maxCount [IN] Max length of destination address memory
* @param value [IN] set value
* @param count [IN] the number of byte to set
* @param stream [IN] asynchronized task stream
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr,
size_t maxCount,
int32_t value,
size_t count,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create stream instance
*
* @param stream [OUT] the created stream
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtCreateStream(aclrtStream *stream);

/**
* @ingroup AscendCL
* @brief destroy stream instance
*
* @par Function
* Can only destroy streams created through the aclrtCreateStream interface
*
* @par Restriction
* Before calling the aclrtDestroyStream interface to destroy
* the specified Stream, you need to call the aclrtSynchronizeStream interface
* to ensure that the tasks in the Stream have been completed.
* @param stream [IN] the stream to destroy
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclrtCreateStream | aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtDestroyStream(aclrtStream stream);

/**
* @ingroup AscendCL
* @brief block the host until all tasks
* in the specified stream have completed
*
* @param stream [IN] the stream to wait
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtSynchronizeStream(aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Blocks the operation of the specified Stream until
* the specified Event is completed.
* Support for multiple streams waiting for the same event.
*
* @param stream [IN] the wait stream If using thedefault Stream, set NULL
* @param event [IN] the event to wait
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtStreamWaitEvent(aclrtStream stream, aclrtEvent event);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_RT_H_


+ 413
- 0
dnn/atlas-stub/include/acl/ops/acl_cblas.h View File

@@ -0,0 +1,413 @@
/**
* @file acl_cblas.h
*
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
#ifndef INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_
#define INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_

#include "../acl.h"

#ifdef __cplusplus
extern "C" {
#endif

typedef enum aclTransType {
ACL_TRANS_N,
ACL_TRANS_T,
ACL_TRANS_NZ,
ACL_TRANS_NZ_T
} aclTransType;

typedef enum aclComputeType {
ACL_COMPUTE_HIGH_PRECISION,
ACL_COMPUTE_LOW_PRECISION
} aclComputeType;

/**
* @ingroup AscendCL
* @brief perform the matrix-vector multiplication
*
* @param transA [IN] transpose type of matrix A
* @param m [IN] number of rows of matrix A
* @param n [IN] number of columns of matrix A
* @param alpha [IN] pointer to scalar used for multiplication.
* of same type as dataTypeC
* @param a [IN] pointer to matrix A
* @param lda [IN] leading dimension used to store the matrix A
* @param dataTypeA [IN] datatype of matrix A
* @param x [IN] pointer to vector x
* @param incx [IN] stride between consecutive elements of vector x
* @param dataTypeX [IN] datatype of vector x
* @param beta [IN] pointer to scalar used for multiplication.
* of same type as dataTypeC If beta == 0,
* then y does not have to be a valid input
* @param y [IN|OUT] pointer to vector y
* @param incy [IN] stride between consecutive elements of vector y
* @param dataTypeY [IN] datatype of vector y
* @param type [IN] computation type
* @param stream [IN] stream
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n,
const void *alpha, const void *a, int lda, aclDataType dataTypeA,
const void *x, int incx, aclDataType dataTypeX,
const void *beta, void *y, int incy, aclDataType dataTypeY,
aclComputeType type, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create a handle for performing the matrix-vector multiplication
*
* @param transA [IN] transpose type of matrix A
* @param m [IN] number of rows of matrix A
* @param n [IN] number of columns of matrix A
* @param dataTypeA [IN] datatype of matrix A
* @param dataTypeX [IN] datatype of vector x
* @param dataTypeY [IN] datatype of vector y
* @param type [IN] computation type
* @param handle [OUT] pointer to the pointer to the handle
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA,
int m,
int n,
aclDataType dataTypeA,
aclDataType dataTypeX,
aclDataType dataTypeY,
aclComputeType type,
aclopHandle **handle);

/**
* @ingroup AscendCL
* @brief perform the matrix-vector multiplication
*
* @param transA [IN] transpose type of matrix A
* @param m [IN] number of rows of matrix A
* @param n [IN] number of columns of matrix A
* @param alpha [IN] pointer to scalar used for multiplication
* @param a [IN] pointer to matrix A
* @param lda [IN] leading dimension used to store the matrix A
* @param x [IN] pointer to vector x
* @param incx [IN] stride between consecutive elements of vector x
* @param beta [IN] pointer to scalar used for multiplication.
* If beta value == 0,
* then y does not have to be a valid input
* @param y [IN|OUT] pointer to vector y
* @param incy [IN] stride between consecutive elements of vector y
* @param type [IN] computation type
* @param stream [IN] stream
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA,
int m,
int n,
const aclFloat16 *alpha,
const aclFloat16 *a,
int lda,
const aclFloat16 *x,
int incx,
const aclFloat16 *beta,
aclFloat16 *y,
int incy,
aclComputeType type,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create a handle for performing the matrix-vector multiplication
*
* @param transA [IN] transpose type of matrix A
* @param m [IN] number of rows of matrix A
* @param n [IN] number of columns of matrix A
* @param type [IN] computation type
* @param handle [OUT] pointer to the pointer to the handle
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA,
int m,
int n,
aclComputeType type,
aclopHandle **handle);

/**
* @ingroup AscendCL
* @brief perform the matrix-vector multiplication
*
* @param transA [IN] transpose type of matrix A
* @param m [IN] number of rows of matrix A
* @param n [IN] number of columns of matrix A
* @param alpha [IN] pointer to scalar used for multiplication
* @param a [IN] pointer to matrix A
* @param lda [IN] leading dimension used to store the matrix A
* @param x [IN] pointer to vector x
* @param incx [IN] stride between consecutive elements of vector x
* @param beta [IN] pointer to scalar used for multiplication.
* If beta value == 0,
* then y does not have to be a valid input
* @param y [IN|OUT] pointer to vector y
* @param incy [IN] stride between consecutive elements of vector y
* @param type [IN] computation type
* @param stream [IN] stream
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA,
int m,
int n,
const int32_t *alpha,
const int8_t *a,
int lda,
const int8_t *x,
int incx,
const int32_t *beta,
int32_t *y,
int incy,
aclComputeType type,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create a handle for performing the matrix-vector multiplication
*
* @param transA [IN] transpose type of matrix A
* @param m [IN] number of rows of matrix A
* @param n [IN] number of columns of matrix A
* @param handle [OUT] pointer to the pointer to the handle
* @param type [IN] computation type
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA,
int m,
int n,
aclComputeType type,
aclopHandle **handle);

/**
* @ingroup AscendCL
* @brief perform the matrix-matrix multiplication
*
* @param transA [IN] transpose type of matrix A
* @param transB [IN] transpose type of matrix B
* @param transC [IN] transpose type of matrix C
* @param m [IN] number of rows of matrix A and matrix C
* @param n [IN] number of columns of matrix B and matrix C
* @param k [IN] number of columns of matrix A and rows of matrix B
* @param alpha [IN] pointer to scalar used for multiplication. of same type as dataTypeC
* @param matrixA [IN] pointer to matrix A
* @param lda [IN] leading dimension array used to store matrix A
* @param dataTypeA [IN] datatype of matrix A
* @param matrixB [IN] pointer to matrix B
* @param ldb [IN] leading dimension array used to store matrix B
* @param dataTypeB [IN] datatype of matrix B
* @param beta [IN] pointer to scalar used for multiplication.
* of same type as dataTypeC If beta == 0,
* then matrixC does not have to be a valid input
* @param matrixC [IN|OUT] pointer to matrix C
* @param ldc [IN] leading dimension array used to store matrix C
* @param dataTypeC [IN] datatype of matrix C
* @param type [IN] computation type
* @param stream [IN] stream
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
const void *alpha,
const void *matrixA,
int lda,
aclDataType dataTypeA,
const void *matrixB,
int ldb,
aclDataType dataTypeB,
const void *beta,
void *matrixC,
int ldc,
aclDataType dataTypeC,
aclComputeType type,
aclrtStream stream);


/**
* @ingroup AscendCL
* @brief create a handle for performing the matrix-matrix multiplication
*
* @param transA [IN] transpose type of matrix A
* @param transB [IN] transpose type of matrix B
* @param transC [IN] transpose type of matrix C
* @param m [IN] number of rows of matrix A and matrix C
* @param n [IN] number of columns of matrix B and matrix C
* @param k [IN] number of columns of matrix A and rows of matrix B
* @param dataTypeA [IN] datatype of matrix A
* @param dataTypeB [IN] datatype of matrix B
* @param dataTypeC [IN] datatype of matrix C
* @param type [IN] computation type
* @param handle [OUT] pointer to the pointer to the handle
* @param type [IN] computation type
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
aclDataType dataTypeA,
aclDataType dataTypeB,
aclDataType dataTypeC,
aclComputeType type,
aclopHandle **handle);


/**
* @ingroup AscendCL
* @brief perform the matrix-matrix multiplication
*
* @param transA [IN] transpose type of matrix A
* @param transB [IN] transpose type of matrix B
* @param transC [IN] transpose type of matrix C
* @param m [IN] number of rows of matrix A and matrix C
* @param n [IN] number of columns of matrix B and matrix C
* @param k [IN] number of columns of matrix A and rows of matrix B
* @param alpha [IN] pointer to scalar used for multiplication
* @param matrixA [IN] pointer to matrix A
* @param lda [IN] leading dimension used to store the matrix A
* @param matrixB [IN] pointer to matrix B
* @param ldb [IN] leading dimension used to store the matrix B
* @param beta [IN] pointer to scalar used for multiplication.
* If beta value == 0,
* then matrixC does not have to be a valid input
* @param matrixC [IN|OUT] pointer to matrix C
* @param ldc [IN] leading dimension used to store the matrix C
* @param type [IN] computation type
* @param stream [IN] stream
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
const aclFloat16 *alpha,
const aclFloat16 *matrixA,
int lda,
const aclFloat16 *matrixB,
int ldb,
const aclFloat16 *beta,
aclFloat16 *matrixC,
int ldc,
aclComputeType type,
aclrtStream stream);

/**
* @ingroup AscendCL
* @brief create a handle for performing the matrix-matrix multiplication
*
* @param transA [IN] transpose type of matrix A
* @param transB [IN] transpose type of matrix B
* @param transC [IN] transpose type of matrix C
* @param m [IN] number of rows of matrix A and matrix C
* @param n [IN] number of columns of matrix B and matrix C
* @param k [IN] number of columns of matrix A and rows of matrix B
* @param type [IN] computation type
* @param handle [OUT] pointer to the pointer to the handle
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
aclComputeType type,
aclopHandle **handle);

/**
* @ingroup AscendCL
* @brief perform the matrix-matrix multiplication
*
* @param transA [IN] transpose type of matrix A
* @param transB [IN] transpose type of matrix B
* @param transC [IN] transpose type of matrix C
* @param m [IN] number of rows of matrix A and matrix C
* @param n [IN] number of columns of matrix B and matrix C
* @param k [IN] number of columns of matrix A and rows of matrix B
* @param alpha [IN] pointer to scalar used for multiplication
* @param matrixA [IN] pointer to matrix A
* @param lda [IN] leading dimension used to store the matrix A
* @param matrixB [IN] pointer to matrix B
* @param ldb [IN] leading dimension used to store the matrix B
* @param beta [IN] pointer to scalar used for multiplication.
* If beta value == 0,
* then matrixC does not have to be a valid input
* @param matrixC [IN|OUT] pointer to matrix C
* @param ldc [IN] leading dimension used to store the matrix C
* @param type [IN] computation type
* @param stream [IN] stream
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
const int32_t *alpha,
const int8_t *matrixA,
int lda,
const int8_t *matrixB,
int ldb,
const int32_t *beta,
int32_t *matrixC,
int ldc,
aclComputeType type,
aclrtStream stream);


/**
* @ingroup AscendCL
* @brief create a handle for performing the matrix-matrix multiplication
*
* @param transA [IN] transpose type of matrix A
* @param transB [IN] transpose type of matrix B
* @param transC [IN] transpose type of matrix C
* @param m [IN] number of rows of matrix A and matrix C
* @param n [IN] number of columns of matrix B and matrix C
* @param k [IN] number of columns of matrix A and rows of matrix B
* @param type [IN] computation type
* @param handle [OUT] pointer to the pointer to the handle
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA,
aclTransType transB,
aclTransType transC,
int m,
int n,
int k,
aclComputeType type,
aclopHandle **handle);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_

+ 1675
- 0
dnn/atlas-stub/include/acl/ops/acl_dvpp.h
File diff suppressed because it is too large
View File


+ 261
- 0
dnn/atlas-stub/include/acl/ops/acl_fv.h View File

@@ -0,0 +1,261 @@
/* *
* @file acl_fv.h
*
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
#ifndef INC_EXTERNAL_ACL_OPS_ACL_RETR_H_
#define INC_EXTERNAL_ACL_OPS_ACL_RETR_H_

#include "../acl.h"

#ifdef __cplusplus
extern "C" {
#endif

typedef struct aclfvFeatureInfo aclfvFeatureInfo;
typedef struct aclfvRepoRange aclfvRepoRange;
typedef struct aclfvQueryTable aclfvQueryTable;
typedef struct aclfvSearchInput aclfvSearchInput;
typedef struct aclfvSearchResult aclfvSearchResult;

// search operation type
enum aclfvSearchType {
SEARCH_1_N, // 1:N operation type
SEARCH_N_M // N:M operation type
};

/* *
* @ingroup AscendCL
* @brief Create fv feature info.
* @param id0 [IN]: The first level library id0
* @param id1 [IN]: Secondary library id1
* @param offset [IN]: The offset of the first feature in the library
* @param featureLen [IN]: Single feature length
* @param featureCount [IN]: Single feature count
* @param featureData [IN/OUT]: Feature value list
* @param featureDataLen [IN]: Feature value list length
* @retval null for failed.
* @retval OtherValues success.
*/
ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset,
uint32_t featureLen, uint32_t featureCount, uint8_t *featureData, uint32_t featureDataLen);

/* *
* @ingroup AscendCL
* @brief Destroy fv feature info.
*
* @par Function
* Can only destroy fv feature info information created
* through aclfvCreateFeatureInfo interface.
* @param featureInfo [IN/OUT] fv feature info.
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclfvCreateFeatureInfo
*/
ACL_FUNC_VISIBILITY aclError aclfvDestroyFeatureInfo(aclfvFeatureInfo *featureInfo);

/* *
* @ingroup AscendCL
* @brief Create fv repo range.
* @param id0Min [IN]: id0 start value
* @param id0Min [IN]: id0 max
* @param id1Min [IN]: id0 start value
* @param id1Max [IN]: id1 max
* @retval null for failed. OtherValues success
*/
ACL_FUNC_VISIBILITY aclfvRepoRange *aclfvCreateRepoRange(uint32_t id0Min, uint32_t id0Max, uint32_t id1Min,
uint32_t id1Max);

/* *
* @ingroup AscendCL
* @brief Destroy fv repo range.
*
* @par Function
* Can only destroy fv repo range information created
* through aclfvCreateRepoRange interface.
* @param repoRange [IN/OUT] fv repo range.
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclfvCreateRepoRange
*/
ACL_FUNC_VISIBILITY aclError aclfvDestroyRepoRange(aclfvRepoRange *repoRange);

/* *
* @ingroup AscendCL
* @brief Create query table.
* @param queryCnt [IN]: Number of tables, the maximum number is 6
* @param tableLen [IN]: Single table length, table length is 32KB
* @param tableData [IN/OUT]: Feature value list
* @param tableDataLen [IN]: The length of memory requested by the featureData
* pointer
* @retval null for failed. OtherValues success
*/
ACL_FUNC_VISIBILITY aclfvQueryTable *aclfvCreateQueryTable(uint32_t queryCnt, uint32_t tableLen, uint8_t *tableData,
uint32_t tableDataLen);

/* *
* @ingroup AscendCL
* @brief Destroy query table.
*
* @par Function
* Can only destroy query table information created
* through aclfvCreateQueryTable interface.
* @param queryTable [IN/OUT] query table.
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclfvCreateQueryTable
*/
ACL_FUNC_VISIBILITY aclError aclfvDestroyQueryTable(aclfvQueryTable *queryTable);

/* *
* @ingroup AscendCL
* @brief Create search input.
* @param queryTable [IN/OUT]: query table
* @param repoRange [IN/OUT]: query repo range
* @param topk [IN]: query topk
* @retval null for failed. OtherValues success
*/
ACL_FUNC_VISIBILITY aclfvSearchInput *aclfvCreateSearchInput(aclfvQueryTable *queryTable, aclfvRepoRange *repoRange,
uint32_t topk);

/* *
* @ingroup AscendCL
* @brief Destroy search input.
*
* @par Function
* Can only destroy search input information created
* through aclfvCreateSearchInput interface.
* @param searchInput [IN/OUT] search input.
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclfvCreateSearchInput
*/
ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInput);

/* *
* @ingroup AscendCL
* @brief Create search result.
* @param queryCnt [IN]: Retrieve the number of features
* @param resultNum [IN/OUT]: The number of search results for each feature, the
* number is queryCnt
* @param resultNumDataLen [IN]: resultNum memory length
* @param id0 [IN/OUT]: Level 1 library id0
* @param id1 [IN/OUT]: Secondary library id1
* @param resultOffset [IN/OUT]: The offset of the bottom library corresponding
* to each feature retrieval result, total length topK * queryCnt
* @param resultDistance [IN/OUT]: Distance, total length topK * queryCnt
* @param dataLen [IN]: The memory size requested by
* id0\id1\reslutOffset\resultDistance
* @retval null for failed. OtherValues success
*/
ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum,
uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, uint32_t *resultOffset, float *resultDistance,
uint32_t dataLen);

/* *
* @ingroup AscendCL
* @brief Destroy search result.
*
* @par Function
* Can only destroy search result information created
* through aclfvCreateSearchResult interface.
* @param searchResult [IN/OUT] search result.
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure
*
* @see aclfvCreateSearchResult
*/
ACL_FUNC_VISIBILITY aclError aclfvDestroySearchResult(aclfvSearchResult *searchResult);

/* *
* @ingroup AscendCL
* @brief fv IP initialize.
*
* @param fsNum [IN] max repo num, used to apply for memory.
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure.
*/
ACL_FUNC_VISIBILITY aclError aclfvInit(uint64_t fsNum);

/* *
* @ingroup AscendCL
* @brief release fv resources.
*
* @par Function
* Can only release fv resources created
* through aclfvInit interface.
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure.
*
* @see aclfvInit
*/
ACL_FUNC_VISIBILITY aclError aclfvRelease();

/* *
* @ingroup AscendCL
* @brief fv repo add.
* @param type [IN]: repo add type
* @param featureInfo [IN/OUT]: add feature information
* @param stream [IN]: stream of task execute
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure.
*/
ACL_FUNC_VISIBILITY aclError aclfvRepoAdd(aclfvSearchType type, aclfvFeatureInfo *featureInfo, aclrtStream stream);

/* *
* @ingroup AscendCL
* @brief fv repo del.
* @param type [IN]: repo delete type
* @param repoRange [IN/OUT]: repo range information
* @param stream [IN]: stream of task execute
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure.
*/
ACL_FUNC_VISIBILITY aclError aclfvRepoDel(aclfvSearchType type, aclfvRepoRange *repoRange, aclrtStream stream);

/* *
* @ingroup AscendCL
* @brief fv accurate del.
* @param featureInfo [IN/OUT]: accurate delete feature information
* @param stream [IN]: stream of task execute
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure.
*/
ACL_FUNC_VISIBILITY aclError aclfvDel(aclfvFeatureInfo *featureInfo, aclrtStream stream);

/* *
* @ingroup AscendCL
* @brief fv accurate modify.
* @param featureInfo [IN/OUT]: accurate modify feature information
* @param stream [IN]: stream of task execute
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure.
*/
ACL_FUNC_VISIBILITY aclError aclfvModify(aclfvFeatureInfo *featureInfo, aclrtStream stream);

/* *
* @ingroup AscendCL
* @brief fv search.
* @param type [IN]: search type
* @param searchInput [IN/OUT]: search input
* @param searchRst [IN/OUT]: search result
* @param stream [IN]: stream of task execute
* @retval ACL_ERROR_NONE The function is successfully executed.
* @retval OtherValues Failure.
*/
ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput *searchInput,
aclfvSearchResult *searchRst, aclrtStream stream);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_

+ 94
- 0
dnn/atlas-stub/src/libacl_cblas-wrap.cpp View File

@@ -0,0 +1,94 @@
/**
* \file dnn/atlas-stub/src/libacl_cblas-wrap.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/

#pragma GCC visibility push(default)

#include <cstdio>
#define LOGE(fmt, v...) fprintf(stderr, "err: " fmt "\n", ##v)

#include "acl/acl.h"
#include "acl/ops/acl_cblas.h"

#pragma GCC diagnostic ignored "-Wdeprecated-declarations"

#if defined(_WIN32)
#include <windows.h>
#define RTLD_LAZY 0

static void* dlopen(const char* file, int) {
return static_cast<void*>(LoadLibraryA(file));
}

static void* dlerror() {
const char* errmsg = "dlerror not aviable in windows";
return const_cast<char*>(errmsg);
}

static void* dlsym(void* handle, const char* name) {
FARPROC symbol = GetProcAddress((HMODULE)handle, name);
return reinterpret_cast<void*>(symbol);
}

#else
#include <dlfcn.h>
#include <unistd.h>
#endif

static void log_failed_load(int func_idx);
namespace {
template <typename T>
T on_init_failed(int func_idx);
template <>
aclError on_init_failed(int func_idx) {
log_failed_load(func_idx);
return ACL_ERROR_INTERNAL_ERROR;
}
} // namespace

#include "./libacl_cblas-wrap.h"

static const char* default_so_paths[] = {
"/usr/local/Ascend/acllib/lib64/libacl_cblas.so",
"libacl_cblas.so",
};

static void* get_library_handle() {
void* handle = nullptr;
for (size_t i = 0; i < (sizeof(default_so_paths) / sizeof(char*)); i++) {
handle = dlopen(default_so_paths[i], RTLD_LAZY);
if (handle) {
break;
}
}

if (!handle) {
LOGE("Failed to load atlas library");
return nullptr;
}
return handle;
}

static void log_failed_load(int func_idx) {
LOGE("failed to load atlas func: %s", g_func_name[func_idx]);
}

static void* resolve_library_func(void* handle, const char* func) {
if (!handle) {
LOGE("handle should not be nullptr!");
return nullptr;
}
auto ret = dlsym(handle, func);
if (!ret) {
LOGE("failed to load atlas func: %s", func);
}
return ret;
}

+ 238
- 0
dnn/atlas-stub/src/libacl_cblas-wrap.h View File

@@ -0,0 +1,238 @@
// generated by wraplib.py
// --- begin functions to be implemented
#ifndef _WRAPLIB_API_CALL
#define _WRAPLIB_API_CALL
#endif
#ifndef _WRAPLIB_CALLBACK
#define _WRAPLIB_CALLBACK
#endif
#ifndef ON_ENTRY
#define ON_ENTRY(x)
#endif
static void* get_library_handle();
static void* resolve_library_func(void* , const char*);
namespace {
template<typename T> T on_init_failed(int func_idx);
}
// --- end functions to be implemented
#include <mutex>
#include <cstddef>
static void load_library();
static aclError _WRAPLIB_API_CALL aclblasGemvEx_init(aclTransType arg0, int arg1, int arg2, const void *arg3, const void *arg4, int arg5, aclDataType arg6, const void *arg7, int arg8, aclDataType arg9, const void *arg10, void *arg11, int arg12, aclDataType arg13, aclComputeType arg14, aclrtStream arg15) {
load_library();
return aclblasGemvEx(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11, arg12, arg13, arg14, arg15);
}
static aclError _WRAPLIB_API_CALL aclblasGemvEx_error(aclTransType, int, int, const void *, const void *, int, aclDataType, const void *, int, aclDataType, const void *, void *, int, aclDataType, aclComputeType, aclrtStream) {
return on_init_failed<aclError >(0);
}
static aclError _WRAPLIB_API_CALL aclblasCreateHandleForGemvEx_init(aclTransType arg0, int arg1, int arg2, aclDataType arg3, aclDataType arg4, aclDataType arg5, aclComputeType arg6, aclopHandle **arg7) {
load_library();
return aclblasCreateHandleForGemvEx(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
}
static aclError _WRAPLIB_API_CALL aclblasCreateHandleForGemvEx_error(aclTransType, int, int, aclDataType, aclDataType, aclDataType, aclComputeType, aclopHandle **) {
return on_init_failed<aclError >(1);
}
static aclError _WRAPLIB_API_CALL aclblasHgemv_init(aclTransType arg0, int arg1, int arg2, const aclFloat16 *arg3, const aclFloat16 *arg4, int arg5, const aclFloat16 *arg6, int arg7, const aclFloat16 *arg8, aclFloat16 *arg9, int arg10, aclComputeType arg11, aclrtStream arg12) {
load_library();
return aclblasHgemv(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11, arg12);
}
static aclError _WRAPLIB_API_CALL aclblasHgemv_error(aclTransType, int, int, const aclFloat16 *, const aclFloat16 *, int, const aclFloat16 *, int, const aclFloat16 *, aclFloat16 *, int, aclComputeType, aclrtStream) {
return on_init_failed<aclError >(2);
}
static aclError _WRAPLIB_API_CALL aclblasCreateHandleForHgemv_init(aclTransType arg0, int arg1, int arg2, aclComputeType arg3, aclopHandle **arg4) {
load_library();
return aclblasCreateHandleForHgemv(arg0, arg1, arg2, arg3, arg4);
}
static aclError _WRAPLIB_API_CALL aclblasCreateHandleForHgemv_error(aclTransType, int, int, aclComputeType, aclopHandle **) {
return on_init_failed<aclError >(3);
}
static aclError _WRAPLIB_API_CALL aclblasS8gemv_init(aclTransType arg0, int arg1, int arg2, const int32_t *arg3, const int8_t *arg4, int arg5, const int8_t *arg6, int arg7, const int32_t *arg8, int32_t *arg9, int arg10, aclComputeType arg11, aclrtStream arg12) {
load_library();
return aclblasS8gemv(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11, arg12);
}
static aclError _WRAPLIB_API_CALL aclblasS8gemv_error(aclTransType, int, int, const int32_t *, const int8_t *, int, const int8_t *, int, const int32_t *, int32_t *, int, aclComputeType, aclrtStream) {
return on_init_failed<aclError >(4);
}
static aclError _WRAPLIB_API_CALL aclblasCreateHandleForS8gemv_init(aclTransType arg0, int arg1, int arg2, aclComputeType arg3, aclopHandle **arg4) {
load_library();
return aclblasCreateHandleForS8gemv(arg0, arg1, arg2, arg3, arg4);
}
static aclError _WRAPLIB_API_CALL aclblasCreateHandleForS8gemv_error(aclTransType, int, int, aclComputeType, aclopHandle **) {
return on_init_failed<aclError >(5);
}
static aclError _WRAPLIB_API_CALL aclblasGemmEx_init(aclTransType arg0, aclTransType arg1, aclTransType arg2, int arg3, int arg4, int arg5, const void *arg6, const void *arg7, int arg8, aclDataType arg9, const void *arg10, int arg11, aclDataType arg12, const void *arg13, void *arg14, int arg15, aclDataType arg16, aclComputeType arg17, aclrtStream arg18) {
load_library();
return aclblasGemmEx(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11, arg12, arg13, arg14, arg15, arg16, arg17, arg18);
}
static aclError _WRAPLIB_API_CALL aclblasGemmEx_error(aclTransType, aclTransType, aclTransType, int, int, int, const void *, const void *, int, aclDataType, const void *, int, aclDataType, const void *, void *, int, aclDataType, aclComputeType, aclrtStream) {
return on_init_failed<aclError >(6);
}
static aclError _WRAPLIB_API_CALL aclblasCreateHandleForGemmEx_init(aclTransType arg0, aclTransType arg1, aclTransType arg2, int arg3, int arg4, int arg5, aclDataType arg6, aclDataType arg7, aclDataType arg8, aclComputeType arg9, aclopHandle **arg10) {
load_library();
return aclblasCreateHandleForGemmEx(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10);
}
static aclError _WRAPLIB_API_CALL aclblasCreateHandleForGemmEx_error(aclTransType, aclTransType, aclTransType, int, int, int, aclDataType, aclDataType, aclDataType, aclComputeType, aclopHandle **) {
return on_init_failed<aclError >(7);
}
static aclError _WRAPLIB_API_CALL aclblasHgemm_init(aclTransType arg0, aclTransType arg1, aclTransType arg2, int arg3, int arg4, int arg5, const aclFloat16 *arg6, const aclFloat16 *arg7, int arg8, const aclFloat16 *arg9, int arg10, const aclFloat16 *arg11, aclFloat16 *arg12, int arg13, aclComputeType arg14, aclrtStream arg15) {
load_library();
return aclblasHgemm(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11, arg12, arg13, arg14, arg15);
}
static aclError _WRAPLIB_API_CALL aclblasHgemm_error(aclTransType, aclTransType, aclTransType, int, int, int, const aclFloat16 *, const aclFloat16 *, int, const aclFloat16 *, int, const aclFloat16 *, aclFloat16 *, int, aclComputeType, aclrtStream) {
return on_init_failed<aclError >(8);
}
static aclError _WRAPLIB_API_CALL aclblasCreateHandleForHgemm_init(aclTransType arg0, aclTransType arg1, aclTransType arg2, int arg3, int arg4, int arg5, aclComputeType arg6, aclopHandle **arg7) {
load_library();
return aclblasCreateHandleForHgemm(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
}
static aclError _WRAPLIB_API_CALL aclblasCreateHandleForHgemm_error(aclTransType, aclTransType, aclTransType, int, int, int, aclComputeType, aclopHandle **) {
return on_init_failed<aclError >(9);
}
static aclError _WRAPLIB_API_CALL aclblasS8gemm_init(aclTransType arg0, aclTransType arg1, aclTransType arg2, int arg3, int arg4, int arg5, const int32_t *arg6, const int8_t *arg7, int arg8, const int8_t *arg9, int arg10, const int32_t *arg11, int32_t *arg12, int arg13, aclComputeType arg14, aclrtStream arg15) {
load_library();
return aclblasS8gemm(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11, arg12, arg13, arg14, arg15);
}
static aclError _WRAPLIB_API_CALL aclblasS8gemm_error(aclTransType, aclTransType, aclTransType, int, int, int, const int32_t *, const int8_t *, int, const int8_t *, int, const int32_t *, int32_t *, int, aclComputeType, aclrtStream) {
return on_init_failed<aclError >(10);
}
static aclError _WRAPLIB_API_CALL aclblasCreateHandleForS8gemm_init(aclTransType arg0, aclTransType arg1, aclTransType arg2, int arg3, int arg4, int arg5, aclComputeType arg6, aclopHandle **arg7) {
load_library();
return aclblasCreateHandleForS8gemm(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
}
static aclError _WRAPLIB_API_CALL aclblasCreateHandleForS8gemm_error(aclTransType, aclTransType, aclTransType, int, int, int, aclComputeType, aclopHandle **) {
return on_init_failed<aclError >(11);
}
static constexpr size_t NR_FUNC = 12;
static void* g_func_table[NR_FUNC] = {(void*)(&aclblasGemvEx_init),
(void*)(&aclblasCreateHandleForGemvEx_init),
(void*)(&aclblasHgemv_init),
(void*)(&aclblasCreateHandleForHgemv_init),
(void*)(&aclblasS8gemv_init),
(void*)(&aclblasCreateHandleForS8gemv_init),
(void*)(&aclblasGemmEx_init),
(void*)(&aclblasCreateHandleForGemmEx_init),
(void*)(&aclblasHgemm_init),
(void*)(&aclblasCreateHandleForHgemm_init),
(void*)(&aclblasS8gemm_init),
(void*)(&aclblasCreateHandleForS8gemm_init)};
static void* g_func_table_error[NR_FUNC] = {(void*)(&aclblasGemvEx_error),
(void*)(&aclblasCreateHandleForGemvEx_error),
(void*)(&aclblasHgemv_error),
(void*)(&aclblasCreateHandleForHgemv_error),
(void*)(&aclblasS8gemv_error),
(void*)(&aclblasCreateHandleForS8gemv_error),
(void*)(&aclblasGemmEx_error),
(void*)(&aclblasCreateHandleForGemmEx_error),
(void*)(&aclblasHgemm_error),
(void*)(&aclblasCreateHandleForHgemm_error),
(void*)(&aclblasS8gemm_error),
(void*)(&aclblasCreateHandleForS8gemm_error)};
static const char* const g_func_name[NR_FUNC] = {"aclblasGemvEx",
"aclblasCreateHandleForGemvEx",
"aclblasHgemv",
"aclblasCreateHandleForHgemv",
"aclblasS8gemv",
"aclblasCreateHandleForS8gemv",
"aclblasGemmEx",
"aclblasCreateHandleForGemmEx",
"aclblasHgemm",
"aclblasCreateHandleForHgemm",
"aclblasS8gemm",
"aclblasCreateHandleForS8gemm"};

static void load_library() {
static bool done = false;
static std::mutex mtx;
std::lock_guard<std::mutex> lg{mtx};

if (done)
return;

void* handle = get_library_handle();
for (size_t i = 0; i < NR_FUNC; ++i) {
void* func;
if (!handle) {
func = nullptr;
} else {
func = resolve_library_func(handle, g_func_name[i]);
}
if (!func) {
func = g_func_table_error[i];
}
__atomic_store_n(g_func_table + i, func, __ATOMIC_RELAXED);
}
done = true;
}

aclError _WRAPLIB_API_CALL aclblasGemvEx(aclTransType arg0, int arg1, int arg2, const void *arg3, const void *arg4, int arg5, aclDataType arg6, const void *arg7, int arg8, aclDataType arg9, const void *arg10, void *arg11, int arg12, aclDataType arg13, aclComputeType arg14, aclrtStream arg15) {
typedef aclError (_WRAPLIB_API_CALL *f_ptr_t)(aclTransType, int, int, const void *, const void *, int, aclDataType, const void *, int, aclDataType, const void *, void *, int, aclDataType, aclComputeType, aclrtStream);
ON_ENTRY(aclblasGemvEx);
f_ptr_t f = (f_ptr_t)(g_func_table[0]);
return f(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11, arg12, arg13, arg14, arg15);
}
aclError _WRAPLIB_API_CALL aclblasCreateHandleForGemvEx(aclTransType arg0, int arg1, int arg2, aclDataType arg3, aclDataType arg4, aclDataType arg5, aclComputeType arg6, aclopHandle **arg7) {
typedef aclError (_WRAPLIB_API_CALL *f_ptr_t)(aclTransType, int, int, aclDataType, aclDataType, aclDataType, aclComputeType, aclopHandle **);
ON_ENTRY(aclblasCreateHandleForGemvEx);
f_ptr_t f = (f_ptr_t)(g_func_table[1]);
return f(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
}
aclError _WRAPLIB_API_CALL aclblasHgemv(aclTransType arg0, int arg1, int arg2, const aclFloat16 *arg3, const aclFloat16 *arg4, int arg5, const aclFloat16 *arg6, int arg7, const aclFloat16 *arg8, aclFloat16 *arg9, int arg10, aclComputeType arg11, aclrtStream arg12) {
typedef aclError (_WRAPLIB_API_CALL *f_ptr_t)(aclTransType, int, int, const aclFloat16 *, const aclFloat16 *, int, const aclFloat16 *, int, const aclFloat16 *, aclFloat16 *, int, aclComputeType, aclrtStream);
ON_ENTRY(aclblasHgemv);
f_ptr_t f = (f_ptr_t)(g_func_table[2]);
return f(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11, arg12);
}
aclError _WRAPLIB_API_CALL aclblasCreateHandleForHgemv(aclTransType arg0, int arg1, int arg2, aclComputeType arg3, aclopHandle **arg4) {
typedef aclError (_WRAPLIB_API_CALL *f_ptr_t)(aclTransType, int, int, aclComputeType, aclopHandle **);
ON_ENTRY(aclblasCreateHandleForHgemv);
f_ptr_t f = (f_ptr_t)(g_func_table[3]);
return f(arg0, arg1, arg2, arg3, arg4);
}
aclError _WRAPLIB_API_CALL aclblasS8gemv(aclTransType arg0, int arg1, int arg2, const int32_t *arg3, const int8_t *arg4, int arg5, const int8_t *arg6, int arg7, const int32_t *arg8, int32_t *arg9, int arg10, aclComputeType arg11, aclrtStream arg12) {
typedef aclError (_WRAPLIB_API_CALL *f_ptr_t)(aclTransType, int, int, const int32_t *, const int8_t *, int, const int8_t *, int, const int32_t *, int32_t *, int, aclComputeType, aclrtStream);
ON_ENTRY(aclblasS8gemv);
f_ptr_t f = (f_ptr_t)(g_func_table[4]);
return f(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11, arg12);
}
aclError _WRAPLIB_API_CALL aclblasCreateHandleForS8gemv(aclTransType arg0, int arg1, int arg2, aclComputeType arg3, aclopHandle **arg4) {
typedef aclError (_WRAPLIB_API_CALL *f_ptr_t)(aclTransType, int, int, aclComputeType, aclopHandle **);
ON_ENTRY(aclblasCreateHandleForS8gemv);
f_ptr_t f = (f_ptr_t)(g_func_table[5]);
return f(arg0, arg1, arg2, arg3, arg4);
}
aclError _WRAPLIB_API_CALL aclblasGemmEx(aclTransType arg0, aclTransType arg1, aclTransType arg2, int arg3, int arg4, int arg5, const void *arg6, const void *arg7, int arg8, aclDataType arg9, const void *arg10, int arg11, aclDataType arg12, const void *arg13, void *arg14, int arg15, aclDataType arg16, aclComputeType arg17, aclrtStream arg18) {
typedef aclError (_WRAPLIB_API_CALL *f_ptr_t)(aclTransType, aclTransType, aclTransType, int, int, int, const void *, const void *, int, aclDataType, const void *, int, aclDataType, const void *, void *, int, aclDataType, aclComputeType, aclrtStream);
ON_ENTRY(aclblasGemmEx);
f_ptr_t f = (f_ptr_t)(g_func_table[6]);
return f(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11, arg12, arg13, arg14, arg15, arg16, arg17, arg18);
}
aclError _WRAPLIB_API_CALL aclblasCreateHandleForGemmEx(aclTransType arg0, aclTransType arg1, aclTransType arg2, int arg3, int arg4, int arg5, aclDataType arg6, aclDataType arg7, aclDataType arg8, aclComputeType arg9, aclopHandle **arg10) {
typedef aclError (_WRAPLIB_API_CALL *f_ptr_t)(aclTransType, aclTransType, aclTransType, int, int, int, aclDataType, aclDataType, aclDataType, aclComputeType, aclopHandle **);
ON_ENTRY(aclblasCreateHandleForGemmEx);
f_ptr_t f = (f_ptr_t)(g_func_table[7]);
return f(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10);
}
aclError _WRAPLIB_API_CALL aclblasHgemm(aclTransType arg0, aclTransType arg1, aclTransType arg2, int arg3, int arg4, int arg5, const aclFloat16 *arg6, const aclFloat16 *arg7, int arg8, const aclFloat16 *arg9, int arg10, const aclFloat16 *arg11, aclFloat16 *arg12, int arg13, aclComputeType arg14, aclrtStream arg15) {
typedef aclError (_WRAPLIB_API_CALL *f_ptr_t)(aclTransType, aclTransType, aclTransType, int, int, int, const aclFloat16 *, const aclFloat16 *, int, const aclFloat16 *, int, const aclFloat16 *, aclFloat16 *, int, aclComputeType, aclrtStream);
ON_ENTRY(aclblasHgemm);
f_ptr_t f = (f_ptr_t)(g_func_table[8]);
return f(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11, arg12, arg13, arg14, arg15);
}
aclError _WRAPLIB_API_CALL aclblasCreateHandleForHgemm(aclTransType arg0, aclTransType arg1, aclTransType arg2, int arg3, int arg4, int arg5, aclComputeType arg6, aclopHandle **arg7) {
typedef aclError (_WRAPLIB_API_CALL *f_ptr_t)(aclTransType, aclTransType, aclTransType, int, int, int, aclComputeType, aclopHandle **);
ON_ENTRY(aclblasCreateHandleForHgemm);
f_ptr_t f = (f_ptr_t)(g_func_table[9]);
return f(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
}
aclError _WRAPLIB_API_CALL aclblasS8gemm(aclTransType arg0, aclTransType arg1, aclTransType arg2, int arg3, int arg4, int arg5, const int32_t *arg6, const int8_t *arg7, int arg8, const int8_t *arg9, int arg10, const int32_t *arg11, int32_t *arg12, int arg13, aclComputeType arg14, aclrtStream arg15) {
typedef aclError (_WRAPLIB_API_CALL *f_ptr_t)(aclTransType, aclTransType, aclTransType, int, int, int, const int32_t *, const int8_t *, int, const int8_t *, int, const int32_t *, int32_t *, int, aclComputeType, aclrtStream);
ON_ENTRY(aclblasS8gemm);
f_ptr_t f = (f_ptr_t)(g_func_table[10]);
return f(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11, arg12, arg13, arg14, arg15);
}
aclError _WRAPLIB_API_CALL aclblasCreateHandleForS8gemm(aclTransType arg0, aclTransType arg1, aclTransType arg2, int arg3, int arg4, int arg5, aclComputeType arg6, aclopHandle **arg7) {
typedef aclError (_WRAPLIB_API_CALL *f_ptr_t)(aclTransType, aclTransType, aclTransType, int, int, int, aclComputeType, aclopHandle **);
ON_ENTRY(aclblasCreateHandleForS8gemm);
f_ptr_t f = (f_ptr_t)(g_func_table[11]);
return f(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
}

+ 172
- 0
dnn/atlas-stub/src/libatlas-wrap.cpp View File

@@ -0,0 +1,172 @@
/**
* \file dnn/atlas-stub/src/libatlas-wrap.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/

#pragma GCC visibility push(default)

#include <cstdio>
#define LOGE(fmt, v...) fprintf(stderr, "err: " fmt "\n", ##v)

#include "acl/acl.h"

#pragma GCC diagnostic ignored "-Wdeprecated-declarations"

#if defined(_WIN32)
#include <windows.h>
#define RTLD_LAZY 0

static void* dlopen(const char* file, int) {
return static_cast<void*>(LoadLibraryA(file));
}

static void* dlerror() {
const char* errmsg = "dlerror not aviable in windows";
return const_cast<char*>(errmsg);
}

static void* dlsym(void* handle, const char* name) {
FARPROC symbol = GetProcAddress((HMODULE)handle, name);
return reinterpret_cast<void*>(symbol);
}

#else
#include <dlfcn.h>
#include <unistd.h>
#endif

static void log_failed_load(int func_idx);
namespace {
template <typename T>
T on_init_failed(int func_idx);
template <>
float on_init_failed(int func_idx) {
log_failed_load(func_idx);
return 0.f;
}
template <>
aclFloat16 on_init_failed(int func_idx) {
log_failed_load(func_idx);
return 0;
}
template <>
aclDataBuffer* on_init_failed(int func_idx) {
log_failed_load(func_idx);
return nullptr;
}
template <>
aclError on_init_failed(int func_idx) {
log_failed_load(func_idx);
return ACL_ERROR_INTERNAL_ERROR;
}
template <>
void* on_init_failed(int func_idx) {
log_failed_load(func_idx);
return nullptr;
}
template <>
uint32_t on_init_failed(int func_idx) {
log_failed_load(func_idx);
return 0;
}
template <>
size_t on_init_failed(int func_idx) {
log_failed_load(func_idx);
return 0;
}
template <>
void on_init_failed(int func_idx) {
log_failed_load(func_idx);
}
template <>
int64_t on_init_failed(int func_idx) {
log_failed_load(func_idx);
return 0;
}
template <>
const char* on_init_failed(int func_idx) {
log_failed_load(func_idx);
return "load lib failed";
}
template <>
aclopAttr* on_init_failed(int func_idx) {
log_failed_load(func_idx);
return nullptr;
}
template <>
aclmdlDesc* on_init_failed(int func_idx) {
log_failed_load(func_idx);
return nullptr;
}
template <>
aclmdlDataset* on_init_failed(int func_idx) {
log_failed_load(func_idx);
return nullptr;
}
template <>
aclFormat on_init_failed(int func_idx) {
log_failed_load(func_idx);
return ACL_FORMAT_UNDEFINED;
}
template <>
aclTensorDesc* on_init_failed(int func_idx) {
log_failed_load(func_idx);
return nullptr;
}
template <>
aclDataType on_init_failed(int func_idx) {
log_failed_load(func_idx);
return ACL_DT_UNDEFINED;
}
template <>
aclmdlAIPP* on_init_failed(int func_idx) {
log_failed_load(func_idx);
return nullptr;
}
} // namespace

#include "./libatlas-wrap.h"

static const char* default_so_paths[] = {
"/usr/local/Ascend/acllib/lib64/libascendcl.so",
"libascendcl.so",
};

static void* get_library_handle() {
void* handle = nullptr;
for (size_t i = 0; i < (sizeof(default_so_paths) / sizeof(char*)); i++) {
handle = dlopen(default_so_paths[i], RTLD_LAZY);
if (handle) {
break;
}
}

if (!handle) {
LOGE("Failed to load atlas library");
return nullptr;
}
return handle;
}

static void log_failed_load(int func_idx) {
LOGE("failed to load atlas func: %s", g_func_name[func_idx]);
}

static void* resolve_library_func(void* handle, const char* func) {
if (!handle) {
LOGE("handle should not be nullptr!");
return nullptr;
}
auto ret = dlsym(handle, func);
if (!ret) {
LOGE("failed to load atlas func: %s", func);
}
return ret;
}

+ 2286
- 0
dnn/atlas-stub/src/libatlas-wrap.h
File diff suppressed because it is too large
View File


+ 1
- 0
scripts/whl/manylinux2010/do_build.sh View File

@@ -57,6 +57,7 @@ do
export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DPYTHON_EXECUTABLE=${PYTHON_DIR}/bin/python3"
export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DPYTHON_LIBRARY=${PYTHON_DIR}lib/"
export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DPYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python${MAJOR}.${MINOR}"
export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DMGE_WITH_ATLAS=ON"

if [ ${BUILD_WHL_CPU_ONLY} = "OFF" ]; then
${SRC_DIR}/scripts/cmake-build/host_build.sh -c -t -r


Loading…
Cancel
Save